ANALYSIS OF CUSTOMER LOAN REPAYMENT HISTORY AND


PREDICTING CREDIT RISK SCORE & CUSTOMER SEGMENTATION

BY - SHAMBHURAJ BARGE


Agenda

In this hackhathon,I am going to build a machine learning model on the finantial dataset to predict CreditRiskScore of the home loan borrowers , which is used by many housing finance company to find the quantum of additional finance which can be allowed based on the mortgage. . The credit risk scores are used to evaluate the potential risk posed by lending money to consumers and in turn they can take decisions to mitigate losses due to bad debt..

we have two datasets provides information about demographics and finantial payment history borrowers .

Objectives

  1. to do exploratory Data Analysis using visualizations

  2. to build the analytical framework to predict the credit risk score of each customer using the payment history and other demographic features.

  3. to segment the customers which will help to understand their sensitivity to the interest rate or product features to improve retention ie., to reduce the default or transfer of loan to competition and to understand how to structure offers for better revenue.


Importing The Necessary Libraries

In [390]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
import os
from imblearn.datasets import fetch_datasets
from kmeans_smote import KMeansSMOTE
from collections import Counter
from imblearn.datasets import fetch_datasets
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from imblearn.over_sampling import SMOTE
from math import *
import pandas as pd
import numpy as np

import tensorflow as tf
import warnings
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import Imputer, StandardScaler
import matplotlib.pyplot as plt
from matplotlib import gridspec

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from keras.optimizers import Adam
from keras import backend as K
from tensorflow.python.client import device_lib

import lightgbm as lgb
import catboost as cb
import xgboost as xgb

import seaborn as sns
sns.set_style("whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
warnings.filterwarnings('ignore')
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn import preprocessing, model_selection, metrics, feature_selection
from sklearn.model_selection import GridSearchCV, learning_curve
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn import neighbors, linear_model, svm, tree, ensemble
from sklearn.ensemble import AdaBoostClassifier
from sklearn.decomposition import PCA
from IPython.display import display, HTML
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
import os
from imblearn.datasets import fetch_datasets
from kmeans_smote import KMeansSMOTE
from collections import Counter
from imblearn.datasets import fetch_datasets
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from imblearn.over_sampling import SMOTE
from math import *
import pandas as pd
import numpy as np
import pickle

import tensorflow as tf
import warnings
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import Imputer, StandardScaler
import matplotlib.pyplot as plt
from matplotlib import gridspec

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from keras.optimizers import Adam
from keras import backend as K
from tensorflow.python.client import device_lib

import lightgbm as lgb
import catboost as cb
import xgboost as xgb

import seaborn as sns
sns.set_style("whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
warnings.filterwarnings('ignore')
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn import preprocessing, model_selection, metrics, feature_selection
from sklearn.model_selection import GridSearchCV, learning_curve
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn import neighbors, linear_model, svm, tree, ensemble
from sklearn.ensemble import AdaBoostClassifier
from sklearn.decomposition import PCA
from IPython.display import display, HTML
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn
import numpy as np 
import pandas as pd 
%matplotlib inline
import matplotlib.pyplot as plt 
import seaborn as sns
from scipy import stats
from scipy.stats import norm, skew
from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNetCV, ElasticNet
from xgboost import XGBRegressor, plot_importance 
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))


Set Path

In [391]:
os.chdir("F:\insofe\PHD\data\ml")
In [392]:
pwd()
Out[392]:
'F:\\insofe\\PHD\\data\\ml'


Creating a Pandas DataFrame from a CSV file

In [393]:
#raw train data 1
demographic_raw_dt=pd.read_excel("Train-1557019772293.xlsx")
#raw train data 2
payment_raw_dt=pd.read_excel("Train_PaymentHistory-1557019802161.xlsx")
# raw test demo data
test_demo_dt=pd.read_excel("Test-1557294637283.xlsx")
# raw test payment data
test_payment_dt=pd.read_excel("Test_PaymentHistory-1557294848030.xlsx")
# sample submission file
sample_file=pd.read_csv('samplesubmission-1557545918238.csv')

Data Exploration/Analysis

  • dimension of data set
In [394]:
print("demographic dataset has {} samples with {} features each.".format(*demographic_raw_dt.shape))
print ("payment dataset has {} samples with {} features each.".format(*payment_raw_dt.shape))
demographic dataset has 11093 samples with 9 features each.
payment dataset has 269778 samples with 19 features each.
  • describing data

demographic data

In [395]:
demographic_raw_dt.head()
Out[395]:
CustomerID DOB Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties CreditRiskScore
0 C12116 1969-04-22 1238.006 0 492.726 1 No 2 711
1 C12117 1981-04-24 1633.000 1 477.870 1 Yes 1 767
2 C12118 1970-02-10 1489.000 0 351.594 1 Yes 1 827
3 C12119 1976-04-22 1241.000 0 403.590 1 No 1 795
4 C12120 1975-04-24 1074.000 1 406.066 1 No 1 827
In [396]:
demographic_raw_dt.columns
Out[396]:
Index(['CustomerID', 'DOB', 'Salary', 'ProfessionalLicensure',
       'UtilitySpending', 'eCommerceAccount', 'SocialMediaAccount',
       'NoOfProperties', 'CreditRiskScore'],
      dtype='object')
In [397]:
demographic_raw_dt.dtypes
Out[397]:
CustomerID                object
DOB                       object
Salary                   float64
ProfessionalLicensure      int64
UtilitySpending          float64
eCommerceAccount           int64
SocialMediaAccount        object
NoOfProperties             int64
CreditRiskScore            int64
dtype: object

payment data

In [398]:
payment_raw_dt.head(30)
Out[398]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status
0 C12116 25 25 113 41303.420 24.498 9.200 2.263 2.899 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
1 C12116 26 25 113 41061.950 24.484 9.200 2.251 2.151 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
2 C12116 27 25 113 40804.420 24.627 9.200 2.224 2.362 4.400 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
3 C12116 28 25 113 40483.890 24.736 9.200 2.197 1.229 4.600 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
4 C12116 29 25 113 40367.060 24.925 9.200 2.174 1.693 4.500 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
5 C12116 30 25 113 40127.970 25.318 9.200 2.127 2.274 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
6 C12116 31 25 113 39718.660 26.566 9.200 2.007 1.851 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
7 C12116 32 25 113 35877.030 25.873 9.200 1.861 1.104 5.000 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
8 C12116 33 25 113 34410.030 25.584 9.200 1.805 0.837 5.000 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
9 C12116 34 25 113 33590.470 26.008 9.200 1.734 -0.314 5.800 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
10 C12116 35 25 113 32952.480 27.286 9.200 1.621 -2.806 6.500 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
11 C12116 36 25 113 32688.300 28.964 9.200 1.515 -3.517 7.800 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
12 C12116 37 25 113 32388.300 28.348 9.200 1.534 -4.147 9.000 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
13 C12116 38 25 113 32388.300 27.380 9.200 1.588 -3.340 9.500 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
14 C12116 39 25 113 32388.300 27.486 9.200 1.582 -0.241 10.000 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
15 C12116 40 25 113 32388.300 27.829 9.200 1.562 1.586 9.800 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
16 C12116 41 25 113 32024.750 26.688 9.200 1.611 2.682 9.900 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
17 C12116 42 25 113 31652.770 26.388 9.200 1.610 3.029 9.400 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
18 C12116 43 25 113 31272.170 26.899 9.200 1.560 2.694 9.400 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
19 C12116 44 25 113 30585.430 27.203 9.200 1.509 1.876 9.200 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
20 C12116 45 25 113 30116.750 26.101 9.200 1.549 1.639 9.100 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
21 C12116 46 25 113 29658.490 25.582 9.200 1.556 1.176 9.000 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
22 C12116 47 25 113 29231.670 26.230 9.200 1.496 1.668 8.800 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default
23 C12116 48 25 113 29087.210 26.658 9.200 1.464 2.716 8.300 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Default
24 C12117 25 25 138 162452.330 57.099 7.500 2.263 2.899 4.700 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 Non-Payoff/Non-Default
25 C12117 26 25 138 162045.150 57.257 7.500 2.251 2.151 4.700 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 Non-Payoff/Non-Default
26 C12117 27 25 138 161630.280 57.806 7.500 2.224 2.362 4.400 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 Non-Payoff/Non-Default
27 C12117 28 25 138 161349.360 58.420 10.500 2.197 1.229 4.600 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 Non-Payoff/Non-Default
28 C12117 29 25 138 161025.610 58.920 10.500 2.174 1.693 4.500 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 Non-Payoff/Non-Default
29 C12117 30 25 138 160859.450 60.143 10.875 2.127 2.274 4.700 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 Non-Payoff/Non-Default
In [399]:
payment_raw_dt.dtypes
Out[399]:
CustomerID                                 object
Current_Instalment_Sequence                 int64
Starting_Instalment                         int64
Maturity_Period                             int64
Current_Outstanding                       float64
Current_Loan_to_Appraisedvalu_Percent     float64
CurrentInterestrate                       float64
RealEstate_Current_Inflation              float64
GDP                                       float64
UnemploymentRate                          float64
Asset_type                                 object
Urban_Development                          object
Villa_House                                object
Investment_SelfOccupied                    object
Starting_outstanding                      float64
Starting_Loan_to_Appraisedvalu_Percent    float64
StartingInterestrate                      float64
RealEstate_Starting_Inflation             float64
Payment_Status                             object
dtype: object


Columns in the dataset

CustomerID - unique no
Current_Instalment_Sequence -
Starting_Instalment
Maturity_Period-he time between when the bond is issued and when it matures in months.
Current_Outstanding
Current_Loan_to_Appraisedvalu_Percent
CurrentInterestrate
RealEstate_Current_Inflation - property rate in percentage
GDP -is defined as the market value of the goods and services produced by a country.
UnemploymentRate
Asset_type
Urban_Development
Villa_House
Investment_SelfOccupied-'Self Occupancy', 'Investment'
Starting_outstanding - total debt
Starting_Loan_to_Appraisedvalu_Percent -LTV ratio is 75% or lower, you could get a lower rate, because the loan is seen as less risky to the lender. If the value of the home increases after you close on your home purchase, you may be able to refinance to a lower interest rate.
StartingInterestrate
RealEstate_Starting_Inflation
Payment_Status -'Non-Payoff/Non-Default', 'Default', 'Payoff'
Salary
ProfessionalLicensure
UtilitySpending
eCommerceAccount
SocialMediaAccount
DOB NoOfProperties- total properties of borrowers.
CreditRiskScore -calculate the creditworthiness of borrowers. score them bet 300-900. need more than 650 to egligible for loan.


Convert DOB into Age

In [400]:
demographic_raw_dt['DOB']=demographic_raw_dt['DOB'].astype('datetime64')
demographic_raw_dt['age'] = (pd.to_datetime('now') - demographic_raw_dt['DOB']).astype('<m8[Y]')
demographic_raw_dt['age'] = demographic_raw_dt['age'].astype('int')


Remove DOB

In [401]:
demographic_raw_dt=demographic_raw_dt.drop(axis=1,columns='DOB')
combine two dataset on primary key that is CustomerID for doing further analysis
In [402]:
merge_dt=payment_raw_dt.merge(demographic_raw_dt, left_on='CustomerID', right_on='CustomerID')
In [403]:
merge_dt.columns
Out[403]:
Index(['CustomerID', 'Current_Instalment_Sequence', 'Starting_Instalment',
       'Maturity_Period', 'Current_Outstanding',
       'Current_Loan_to_Appraisedvalu_Percent', 'CurrentInterestrate',
       'RealEstate_Current_Inflation', 'GDP', 'UnemploymentRate', 'Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
       'Payment_Status', 'Salary', 'ProfessionalLicensure', 'UtilitySpending',
       'eCommerceAccount', 'SocialMediaAccount', 'NoOfProperties',
       'CreditRiskScore', 'age'],
      dtype='object')
In [404]:
merge_dt.shape
Out[404]:
(269778, 27)
In [405]:
merge_dt.head(5)
Out[405]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties CreditRiskScore age
0 C12116 25 25 113 41303.420 24.498 9.200 2.263 2.899 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50
1 C12116 26 25 113 41061.950 24.484 9.200 2.251 2.151 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50
2 C12116 27 25 113 40804.420 24.627 9.200 2.224 2.362 4.400 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50
3 C12116 28 25 113 40483.890 24.736 9.200 2.197 1.229 4.600 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50
4 C12116 29 25 113 40367.060 24.925 9.200 2.174 1.693 4.500 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50
In [406]:
merge_dt.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 269778 entries, 0 to 269777
Data columns (total 27 columns):
CustomerID                                269778 non-null object
Current_Instalment_Sequence               269778 non-null int64
Starting_Instalment                       269778 non-null int64
Maturity_Period                           269778 non-null int64
Current_Outstanding                       269778 non-null float64
Current_Loan_to_Appraisedvalu_Percent     269778 non-null float64
CurrentInterestrate                       269778 non-null float64
RealEstate_Current_Inflation              269778 non-null float64
GDP                                       269778 non-null float64
UnemploymentRate                          269778 non-null float64
Asset_type                                269778 non-null object
Urban_Development                         269778 non-null object
Villa_House                               269778 non-null object
Investment_SelfOccupied                   269778 non-null object
Starting_outstanding                      269778 non-null float64
Starting_Loan_to_Appraisedvalu_Percent    269778 non-null float64
StartingInterestrate                      269778 non-null float64
RealEstate_Starting_Inflation             269778 non-null float64
Payment_Status                            269778 non-null object
Salary                                    269778 non-null float64
ProfessionalLicensure                     269778 non-null int64
UtilitySpending                           269778 non-null float64
eCommerceAccount                          269778 non-null int64
SocialMediaAccount                        269778 non-null object
NoOfProperties                            269778 non-null int64
CreditRiskScore                           269778 non-null int64
age                                       269778 non-null int32
dtypes: float64(12), int32(1), int64(7), object(7)
memory usage: 56.6+ MB
In [407]:
merge_dt.describe(include='all').T.sort_values("count")
Out[407]:
count unique top freq mean std min 25% 50% 75% max
CustomerID 269778 11093 C13749 51 NaN NaN NaN NaN NaN NaN NaN
NoOfProperties 269778.000 NaN NaN NaN 1.455 0.841 1.000 1.000 1.000 2.000 4.000
SocialMediaAccount 269778 2 Yes 184225 NaN NaN NaN NaN NaN NaN NaN
eCommerceAccount 269778.000 NaN NaN NaN 0.509 0.500 0.000 0.000 1.000 1.000 1.000
UtilitySpending 269778.000 NaN NaN NaN 342.166 90.238 185.701 264.933 341.690 420.922 495.202
ProfessionalLicensure 269778.000 NaN NaN NaN 0.566 0.496 0.000 0.000 1.000 1.000 1.000
Salary 269778.000 NaN NaN NaN 4155.840 8572.512 1000.000 1407.000 1876.889 4129.494 412300.000
Payment_Status 269778 3 Non-Payoff/Non-Default 262230 NaN NaN NaN NaN NaN NaN NaN
RealEstate_Starting_Inflation 269778.000 NaN NaN NaN 1.959 0.350 0.757 1.794 2.127 2.224 2.263
StartingInterestrate 269778.000 NaN NaN NaN 5.745 2.822 0.000 5.250 6.250 7.375 18.000
Starting_Loan_to_Appraisedvalu_Percent 269778.000 NaN NaN NaN 78.131 10.390 50.100 73.200 80.000 80.000 125.000
Starting_outstanding 269778.000 NaN NaN NaN 258672.653 235370.013 10465.200 100000.000 180000.000 368000.000 8000000.000
CreditRiskScore 269778.000 NaN NaN NaN 647.215 144.203 350.000 526.000 676.000 784.000 830.000
Investment_SelfOccupied 269778 2 Self Occupancy 225405 NaN NaN NaN NaN NaN NaN NaN
Urban_Development 269778 2 No 236261 NaN NaN NaN NaN NaN NaN NaN
Asset_type 269778 2 No shred services 250404 NaN NaN NaN NaN NaN NaN NaN
UnemploymentRate 269778.000 NaN NaN NaN 6.882 1.967 3.800 5.000 6.500 9.000 10.000
GDP 269778.000 NaN NaN NaN 1.203 2.040 -4.147 0.893 1.693 2.556 5.132
RealEstate_Current_Inflation 269778.000 NaN NaN NaN 1.792 0.266 1.078 1.562 1.734 2.007 2.263
CurrentInterestrate 269778.000 NaN NaN NaN 6.619 2.030 0.250 5.625 6.500 7.750 37.500
Current_Loan_to_Appraisedvalu_Percent 269778.000 NaN NaN NaN 82.240 25.576 0.000 64.644 82.348 101.714 169.358
Current_Outstanding 269778.000 NaN NaN NaN 244424.458 226337.333 0.000 92942.340 170191.180 346573.312 8701859.240
Maturity_Period 269778.000 NaN NaN NaN 135.278 21.384 29.000 135.000 141.000 145.000 200.000
Starting_Instalment 269778.000 NaN NaN NaN 23.704 5.338 1.000 20.000 24.000 28.000 51.000
Current_Instalment_Sequence 269778.000 NaN NaN NaN 37.645 11.088 1.000 30.000 37.000 46.000 60.000
Villa_House 269778 2 Yes 164385 NaN NaN NaN NaN NaN NaN NaN
age 269778.000 NaN NaN NaN 39.062 3.965 23.000 37.000 38.000 40.000 70.000
  • Observation - ProfessionalLicensure,eCommerceAccount,NoOfProperties are categorical type so need to change its data types
  • observation -CreditRiskScore is target columns and we need to remove before to build the clusters
In [408]:
print(merge_dt.ProfessionalLicensure.unique())
print(merge_dt.eCommerceAccount.unique())
print(merge_dt.NoOfProperties.unique())
[0 1]
[1 0]
[2 1 4 3]
In [409]:
merge_dt['ProfessionalLicensure']=merge_dt['ProfessionalLicensure'].astype('object')
merge_dt['eCommerceAccount']=merge_dt['eCommerceAccount'].astype('object')
merge_dt['NoOfProperties']=merge_dt['NoOfProperties'].astype('object')
In [410]:
merge_dt.Urban_Development.unique()
Out[410]:
array(['No', 'Yes'], dtype=object)
In [411]:
merge_dt.describe(include=['O'])
Out[411]:
CustomerID Asset_type Urban_Development Villa_House Investment_SelfOccupied Payment_Status ProfessionalLicensure eCommerceAccount SocialMediaAccount NoOfProperties
count 269778 269778 269778 269778 269778 269778 269778 269778 269778 269778
unique 11093 2 2 2 2 3 2 2 2 4
top C13749 No shred services No Yes Self Occupancy Non-Payoff/Non-Default 1 1 Yes 1
freq 51 250404 236261 164385 225405 262230 152697 137375 184225 193032
In [412]:
merge_dt.describe(include=['float32','float64','int64','int32'])
Out[412]:
Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Salary UtilitySpending CreditRiskScore age
count 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000 269778.000
mean 37.645 23.704 135.278 244424.458 82.240 6.619 1.792 1.203 6.882 258672.653 78.131 5.745 1.959 4155.840 342.166 647.215 39.062
std 11.088 5.338 21.384 226337.333 25.576 2.030 0.266 2.040 1.967 235370.013 10.390 2.822 0.350 8572.512 90.238 144.203 3.965
min 1.000 1.000 29.000 0.000 0.000 0.250 1.078 -4.147 3.800 10465.200 50.100 0.000 0.757 1000.000 185.701 350.000 23.000
25% 30.000 20.000 135.000 92942.340 64.644 5.625 1.562 0.893 5.000 100000.000 73.200 5.250 1.794 1407.000 264.933 526.000 37.000
50% 37.000 24.000 141.000 170191.180 82.348 6.500 1.734 1.693 6.500 180000.000 80.000 6.250 2.127 1876.889 341.690 676.000 38.000
75% 46.000 28.000 145.000 346573.312 101.714 7.750 2.007 2.556 9.000 368000.000 80.000 7.375 2.224 4129.494 420.922 784.000 40.000
max 60.000 51.000 200.000 8701859.240 169.358 37.500 2.263 5.132 10.000 8000000.000 125.000 18.000 2.263 412300.000 495.202 830.000 70.000
  • observation- all columns have correct data types
  • observation - CreditRiskScore is max 830 means range is bet 300 to 900

Pre-Processing


Correlation Plot

In [413]:
import matplotlib.pyplot as plt
def data_corr(data):
    correlation = data.corr()
    fig, ax = plt.subplots(figsize=(15,15)) 
    sns.heatmap(correlation, annot=True, cbar=True, cmap="RdYlGn") 
num = list(merge_dt.select_dtypes(include=['float32','float64','int64','int32']).columns)
data_corr(merge_dt[num])
  • observation -
  • unemployement and real Estate current inflation shows high co-rrelation
  • highest co-rrelation is 0.98


Check Missing Data

In [414]:
#function to find missing values
def miss_data(x):
    total = x.isnull().sum().sort_values(ascending=False)
    percent = (x.isnull().sum()/x.isnull().count()).sort_values(ascending=False)
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
   
    missing_data.index.name = 'column_names'
    missing_data.reset_index(inplace=True)
    sns.set(style="whitegrid")
    sns.set(rc={'figure.figsize':(20,8.27)})
    sns.barplot(missing_data.column_names,missing_data.Percent, alpha=0.9)
    print(plt.title('missing data plot'))
    print(plt.ylabel('percentage of missing data', fontsize=12))
    print(plt.xlabel('column names', fontsize=12))
    print(plt.show())
    print(missing_data)
In [415]:
miss_data(x=merge_dt)
Text(0.5,1,'missing data plot')
Text(0,0.5,'percentage of missing data')
Text(0.5,0,'column names')
None
                              column_names  Total  Percent
0                                      age      0    0.000
1                              Villa_House      0    0.000
2              Current_Instalment_Sequence      0    0.000
3                      Starting_Instalment      0    0.000
4                          Maturity_Period      0    0.000
5                      Current_Outstanding      0    0.000
6    Current_Loan_to_Appraisedvalu_Percent      0    0.000
7                      CurrentInterestrate      0    0.000
8             RealEstate_Current_Inflation      0    0.000
9                                      GDP      0    0.000
10                        UnemploymentRate      0    0.000
11                              Asset_type      0    0.000
12                       Urban_Development      0    0.000
13                 Investment_SelfOccupied      0    0.000
14                         CreditRiskScore      0    0.000
15                    Starting_outstanding      0    0.000
16  Starting_Loan_to_Appraisedvalu_Percent      0    0.000
17                    StartingInterestrate      0    0.000
18           RealEstate_Starting_Inflation      0    0.000
19                          Payment_Status      0    0.000
20                                  Salary      0    0.000
21                   ProfessionalLicensure      0    0.000
22                         UtilitySpending      0    0.000
23                        eCommerceAccount      0    0.000
24                      SocialMediaAccount      0    0.000
25                          NoOfProperties      0    0.000
26                              CustomerID      0    0.000
there is not any NA's present


Remove Duplicates

In [416]:
print('Entrées dupliquées: {}'.format(merge_dt.duplicated().sum()))
merge_dt.drop_duplicates(inplace = True)
Entrées dupliquées: 226

Feature Engineering

Here we can generate three new features from outstanding columns
In [417]:
merge_dt['Appraisal_value']=merge_dt['Starting_outstanding']/(merge_dt['Starting_Loan_to_Appraisedvalu_Percent']/100)
In [418]:
merge_dt['current_Appraisal_value']=merge_dt['Current_Outstanding']/(merge_dt['Current_Loan_to_Appraisedvalu_Percent']/100)
In [419]:
merge_dt['remaining_outstanding']=  merge_dt['Starting_outstanding'] - merge_dt['Current_Outstanding']
In [420]:
new_data=merge_dt[['CustomerID', 'Starting_Instalment','Maturity_Period','Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
        'age', 'Salary', 'ProfessionalLicensure','Payment_Status',
       'UtilitySpending', 'eCommerceAccount', 'SocialMediaAccount','Appraisal_value',
       'NoOfProperties', 'CreditRiskScore']]
In [421]:
new_data=new_data.drop_duplicates()


Create Credit Class

create credit class on credit score for visualization
In [422]:
credit_rate =[]
for i in range(len(new_data.CreditRiskScore)):
    if new_data.CreditRiskScore.iloc[i] > 650:
        credit_rate.append("high_score")
    elif new_data.CreditRiskScore.iloc[i] < 650 and new_data.CreditRiskScore.iloc[i] > 450 :
        credit_rate.append("Medium_score")
    else:
        credit_rate.append("low_score")
        
In [423]:
new_data['credit_class']=credit_rate

EDA


Univariate Analysis

Write function to plot histogram
In [424]:
def histplot(df):
    cols = list(df.select_dtypes(include=['float32','float64','int64','int32']).columns)
    for i in cols:
        carrier_count= df[i].value_counts()
        sns.set(style="darkgrid")
        sns.distplot(new_data[i])
        plt.title('Frequency Distribution of'+i)
        plt.ylabel('Number of Occurrences', fontsize=12)
        plt.xlabel(i, fontsize=12)
        print(plt.show())
In [425]:
histplot(new_data)
None
None
None
None
None
None
None
None
None
None
None
In [426]:
sns.distplot(merge_dt['Current_Loan_to_Appraisedvalu_Percent'])
Out[426]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e2190fc88>
In [427]:
sns.distplot(merge_dt['remaining_outstanding'])
Out[427]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e191d4780>
In [428]:
sns.distplot(merge_dt['UnemploymentRate'])
Out[428]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e219bd3c8>
In [429]:
sns.distplot(merge_dt['GDP'])
Out[429]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e70fbd518>
In [430]:
sns.distplot(merge_dt['CurrentInterestrate'])
Out[430]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e6eef1ba8>
In [431]:
sns.distplot(merge_dt['Current_Outstanding'])
Out[431]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e6e8c1b00>
In [432]:
sns.distplot(merge_dt['Starting_Instalment'])
Out[432]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e61032860>
In [433]:
sns.distplot(merge_dt['Starting_Instalment'])
Out[433]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e5bbeaef0>
In [434]:
sns.distplot(merge_dt['Current_Instalment_Sequence'])
Out[434]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e56fa7cc0>
observation- most of data is skewd need log or square root transformation before applying the model
Write function to plot categorical data
In [435]:
def distributionplot(df):
    cols = list(df.select_dtypes(include=['object']).columns)
    for i in cols:
        carrier_count= df[i].value_counts()
        sns.set(style="darkgrid")
        sns.barplot(carrier_count.index, carrier_count.values, alpha=0.9)
        plt.title('Frequency Distribution of'+i)
        plt.ylabel('Number of Occurrences', fontsize=12)
        plt.xlabel(i, fontsize=12)
        print(plt.show())
In [436]:
merge_data=merge_dt
merge_data=merge_data.drop(axis=1,columns='CustomerID')
distributionplot(merge_data)
None
None
None
None
None
None
None
None
None
observation- many loan payments are late paid


Bivariate Analysis

In [437]:
# Standard plotly imports
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
In [438]:
demographic_raw_dt.pivot(columns='age', values='CreditRiskScore').iplot(
        kind='box',xTitle='age',
        yTitle='CreditRiskScore',
        title='CreditRiskScore Distribution by age ')
  • Observation- the most of people who get the loan having age bet 35 to 50
  • range bet 36 - 40 age group has very high borrowers because of most of people are finantial strong bet this age group after 50 bank are less interested to give the loan. i think mostly guys are retired from the job."
  • borrowers of age above 40 has very high credit score
In [439]:
color_seq = []
for i in new_data.SocialMediaAccount:
    if i =='Yes':
        color_seq.append('red')
    else:
        color_seq.append('green')
In [440]:
plt.figure(figsize = (10,7))
plt.scatter(x = new_data.Salary,y = new_data.Maturity_Period,color = color_seq)
plt.title('Maturity_Period vs Salary')
plt.xlabel('Salary')
plt.ylabel('Maturity_Period')
plt.show()
observation - we can here three dots having salary more than 200000 are the outliers
In [441]:
new_data.iplot(
    x='Maturity_Period',
    y='age',
    # Specify the category
    categories='credit_class',
    xTitle='Maturity_Period',
    yTitle='age',
    title='Maturity_Period vs age by credit_class')
observation-
  • 1) borrowers who are bet age group 35-40 have maturity period is 122-155 and all are having low credit score
  • 2) age group more than 40 are belong to high score category
In [442]:
new_data.iplot(
    x='Starting_Instalment',
    y='StartingInterestrate',
    # Specify the category
    categories='credit_class',
    xTitle='Starting_Instalment',
    yTitle='StartingInterestrate',
    title='StartingInterestrate vs Starting_Instalment by credit_class ')
observation-
  • 1) medium score people have mostly starting instalment bet 15-35


Univariate Analysis[OUTLIERS]

define function to plot boxplot
In [443]:
def boxplot(df):
    cols = list(df.select_dtypes(include=['float32','float64','int64','int32']).columns)
    for i in cols:
        carrier_count= df[i].value_counts()
        sns.set(style="darkgrid")
        sns.boxplot(df[i])
        plt.title('Frequency Distribution of'+i)
        plt.ylabel('Number of Occurrences', fontsize=12)
        plt.xlabel((i), fontsize=12)
        print(plt.show())
#'float32','float64','int64','int32' 
In [444]:
boxplot(merge_dt)
#sns.boxplot(x=new_data['Starting_Instalment'])
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
Observation -
  • 1) Current interest rate column has one outlier as rate more than 35
  • 2) GDP column has one value greater than 4 and 3 are below 0
  • 3) starting outstanding column has one value at 8000000
  • 4) salary column has three outliers having salary more than 200000
  • 5) one Appraisal value is more than 1.4 e7


Remove Outliers

Current_Outstanding
In [445]:
merge_dt.loc[(merge_dt["Current_Outstanding"] >7000000 )]
Out[445]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties CreditRiskScore age Appraisal_value current_Appraisal_value remaining_outstanding
73150 C15226 26 26 143 7965171.120 53.665 6.750 2.251 2.151 4.700 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 14842300.557 34828.880
73151 C15226 27 26 143 7943800.070 54.174 6.750 2.224 2.362 4.400 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 14663612.709 56199.930
73152 C15226 28 26 143 7922066.350 54.694 6.750 2.197 1.229 4.600 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 14484265.497 77933.650
73153 C15226 29 26 143 7899963.800 55.119 6.750 2.174 1.693 4.500 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 14332611.604 100036.200
73154 C15226 30 26 143 7877486.180 56.161 6.750 2.127 2.274 4.700 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 14026666.359 122513.820
73155 C15226 31 26 143 7854627.110 59.363 6.750 2.007 1.851 4.700 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 13231472.469 145372.890
73156 C15226 32 26 143 7831380.120 63.815 6.750 1.861 1.104 5.000 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 12272096.755 168619.880
73157 C15226 33 26 143 7807738.620 65.596 6.750 1.805 0.837 5.000 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 11902852.494 192261.380
73158 C15226 34 26 143 7783695.920 68.098 6.750 1.734 -0.314 5.800 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 11430087.967 216304.080
73159 C15226 35 26 143 7759245.220 72.600 6.750 1.621 -2.806 6.500 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10687643.257 240754.780
73160 C15226 36 26 143 7734380.000 77.436 6.750 1.515 -3.517 7.800 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 9988057.256 265620.000
73161 C15226 37 26 143 7709091.980 76.242 6.750 1.534 -4.147 9.000 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10111358.464 290908.020
73162 C15226 38 26 143 7709091.980 73.639 6.750 1.588 -3.340 9.500 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10468734.160 290908.020
73163 C15226 39 26 143 7691995.610 73.759 6.750 1.582 -0.241 10.000 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10428512.910 308004.390
73164 C15226 40 26 143 7683375.240 74.596 6.750 1.562 1.586 9.800 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10299936.783 316624.760
73165 C15226 41 26 143 7683375.240 72.350 6.750 1.611 2.682 9.900 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10619728.688 316624.760
73166 C15226 42 26 143 8701859.240 81.971 4.500 1.610 3.029 9.400 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10615772.499 -701859.240
73167 C15226 43 26 143 8688100.840 84.443 4.500 1.560 2.694 9.400 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Non-Payoff/Non-Default 167179.426 0 341.690 0 No 1 577 39 14842300.557 10288727.583 -688100.840
73168 C15226 44 26 143 8688100.840 87.313 4.500 1.509 1.876 9.200 No shred services Yes No Self Occupancy 8000000.000 53.900 6.750 2.251 Payoff 167179.426 0 341.690 0 No 1 577 39 14842300.557 9950473.465 -688100.840
In [446]:
merge_dt=merge_dt[merge_dt.CustomerID != 'C15226']
Salary
In [447]:
merge_dt.loc[(merge_dt["Salary"] > 200000 )]
Out[447]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties CreditRiskScore age Appraisal_value current_Appraisal_value remaining_outstanding
164130 C18942 24 24 133 2700000.000 65.900 4.750 2.237 3.121 4.700 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 4097116.844 0.000
164131 C18942 25 24 133 2700000.000 65.160 4.750 2.263 2.899 4.700 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 4143627.131 0.000
164132 C18942 26 24 133 2700000.000 65.505 4.750 2.251 2.151 4.700 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 4121836.878 0.000
164133 C18942 27 24 133 2700000.000 66.303 4.750 2.224 2.362 4.400 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 4072213.698 0.000
164134 C18942 28 24 133 2700000.000 67.124 4.750 2.197 1.229 4.600 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 4022407.406 0.000
164135 C18942 29 24 133 2700000.000 67.834 4.750 2.174 1.693 4.500 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 3980291.791 0.000
164136 C18942 30 24 133 2700000.000 69.314 4.750 2.127 2.274 4.700 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 3895328.117 0.000
164137 C18942 31 24 133 2700000.000 73.479 4.750 2.007 1.851 4.700 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 3674495.808 0.000
164138 C18942 32 24 133 2700000.000 79.224 4.750 1.861 1.104 5.000 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Non-Payoff/Non-Default 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 3408068.769 0.000
164139 C18942 33 24 133 1300000.000 39.328 4.750 1.805 0.837 5.000 No shred services No Yes Self Occupancy 2700000.000 65.900 4.750 2.237 Payoff 412300.000 1 287.217 0 Yes 1 823 40 4097116.844 3305526.403 1400000.000
190013 C19970 24 24 142 1164982.000 49.163 5.750 2.237 3.121 4.700 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2369617.901 0.000
190014 C19970 25 24 142 1164982.000 48.611 5.750 2.263 2.899 4.700 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2396517.697 0.000
190015 C19970 26 24 142 800000.000 33.558 5.750 2.251 2.151 4.700 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2383915.037 364982.000
190016 C19970 27 24 142 800000.000 33.967 5.750 2.224 2.362 4.400 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2355214.860 364982.000
190017 C19970 28 24 142 800000.000 34.388 5.750 2.197 1.229 4.600 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2326408.779 364982.000
190018 C19970 29 24 142 206631.470 8.976 5.750 2.174 1.693 4.500 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2302050.695 958350.530
190019 C19970 30 24 142 206631.470 9.172 5.750 2.127 2.274 4.700 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2252910.910 958350.530
190020 C19970 31 24 142 206631.470 9.723 5.750 2.007 1.851 4.700 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 2125189.829 958350.530
190021 C19970 32 24 142 206631.470 10.483 5.750 1.861 1.104 5.000 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 1971098.475 958350.530
190022 C19970 33 24 142 206631.470 10.808 5.750 1.805 0.837 5.000 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 1911791.837 958350.530
190023 C19970 34 24 142 206631.470 11.255 5.750 1.734 -0.314 5.800 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 1835858.159 958350.530
190024 C19970 35 24 142 206631.470 12.037 5.750 1.621 -2.806 6.500 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 1716609.455 958350.530
190025 C19970 36 24 142 206631.470 12.880 5.750 1.515 -3.517 7.800 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Non-Payoff/Non-Default 217089.213 0 391.210 1 No 2 815 36 1900460.033 1604244.557 958350.530
190026 C19970 37 24 142 206631.470 12.723 5.750 1.534 -4.147 9.000 No shred services No No Self Occupancy 1164982.000 61.300 5.750 1.794 Payoff 217089.213 0 391.210 1 No 2 815 36 1900460.033 1624048.738 958350.530
235060 C21710 31 31 150 2950000.000 108.322 6.250 2.007 1.851 4.700 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2723358.789 0.000
235061 C21710 32 31 150 2950000.000 116.790 6.250 1.861 1.104 5.000 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2525895.938 0.000
235062 C21710 33 31 150 2950000.000 120.413 6.250 1.805 0.837 5.000 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2449896.490 0.000
235063 C21710 34 31 150 2950000.000 125.394 6.250 1.734 -0.314 5.800 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2352590.054 0.000
235064 C21710 35 31 150 1950000.000 88.645 6.250 1.621 -2.806 6.500 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2199776.878 1000000.000
235065 C21710 36 31 150 1917500.000 93.273 6.250 1.515 -3.517 7.800 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2055785.067 1032500.000
235066 C21710 37 31 150 1917500.000 92.136 6.250 1.534 -4.147 9.000 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2081163.454 1032500.000
235067 C21710 38 31 150 1917500.000 88.991 6.250 1.588 -3.340 9.500 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2154720.063 1032500.000
235068 C21710 39 31 150 1857000.000 86.515 6.250 1.582 -0.241 10.000 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Non-Payoff/Non-Default 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2146441.551 1093000.000
235069 C21710 40 31 150 1857000.000 87.595 6.250 1.562 1.586 9.800 No shred services No Yes Self Occupancy 2950000.000 100.000 6.250 2.174 Payoff 353663.571 1 391.210 0 Yes 1 540 40 2950000.000 2119977.458 1093000.000
In [448]:
merge_dt=merge_dt[merge_dt.CustomerID !='C18942']
merge_dt=merge_dt[merge_dt.CustomerID !='C19970']
merge_dt=merge_dt[merge_dt.CustomerID !='C21710']
Appraisal_value
In [449]:
merge_dt.loc[(merge_dt["Appraisal_value"] > 8.097117e+06 )]## showing nothing means its already dropped
Out[449]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties CreditRiskScore age Appraisal_value current_Appraisal_value remaining_outstanding
define function to remove outliers in GDP column
In [450]:
def remove_outlier(df_in, col_name):
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return df_out
In [451]:
merge_dt=remove_outlier(df_in=merge_dt,col_name='GDP')
In [452]:
merge_dt=remove_outlier(df_in=merge_dt,col_name='Current_Instalment_Sequence')
In [453]:
merge_dt.shape
Out[453]:
(233335, 30)


CLUSTER ANALYSIS

In [454]:
cluster_data=merge_dt
Drop CustomerID
In [455]:
cluster_data=cluster_data.drop(axis=1,columns="CustomerID")
In [456]:
#cluster_data =merge_dt.set_index("CustomerID") #assign customer id as index to dataset to visualize the clusters
In [457]:
cluster_data.head(2)
Out[457]:
Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties CreditRiskScore age Appraisal_value current_Appraisal_value remaining_outstanding
0 25 25 113 41303.420 24.498 9.200 2.263 2.899 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50 64841.499 168596.837 3696.580
1 26 25 113 41061.950 24.484 9.200 2.251 2.151 4.700 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 Non-Payoff/Non-Default 1238.006 0 492.726 1 No 2 711 50 64841.499 167710.230 3938.050
In [458]:
cluster_data=cluster_data.drop(axis=1,columns='CreditRiskScore')
In [459]:
cluster_data=cluster_data.fillna(value=cluster_data.current_Appraisal_value.mean())
change datatypes
In [460]:
cluster_data['Appraisal_value']=cluster_data['Appraisal_value'].astype('int')
In [461]:
cluster_data['current_Appraisal_value']= cluster_data['current_Appraisal_value'].astype('int')
In [462]:
cluster_data['NoOfProperties']= cluster_data['NoOfProperties'].astype('object')
List out categorical and numerical data
In [463]:
cat_cols = list(cluster_data.select_dtypes(include=['object']).columns)
num_cols = list(cluster_data.select_dtypes(include=['float64','float32','int32','int64']).columns)
In [464]:
cat_cols
Out[464]:
['Asset_type',
 'Urban_Development',
 'Villa_House',
 'Investment_SelfOccupied',
 'Payment_Status',
 'SocialMediaAccount',
 'NoOfProperties']


Dummification

In [465]:
dummies_df=pd.get_dummies(cluster_data[cat_cols],drop_first=True)
In [466]:
numerical_data = cluster_data[num_cols]
In [467]:
numerical_data['index']=range(233335)
dummies_df['index']=range(233335)
In [468]:
final_data1=numerical_data.merge(dummies_df, left_on='index', right_on='index')

final_data1=final_data1.drop(axis=1,columns='index')
In [500]:
final_data1.to_csv('final_data.csv')
Find optimum no of K
In [469]:
train_matrix1 = final_data1.as_matrix()
In [470]:
Sum_of_squared_distances = []
K = range(1,10)
for k in K:
    km = KMeans(n_clusters=k)
    km = km.fit(train_matrix1)
    Sum_of_squared_distances.append(km.inertia_)
In [471]:
plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')
plt.show()
observation- after K- 4 the SSD reamain constant

create K-means cluster with k - 4

In [ ]:
kmeans1 = KMeans(n_clusters = 4)
kmeans1.fit(train_matrix1)
In [473]:
clusters=kmeans1.labels_
In [474]:
cluster_class=clusters.tolist()
In [475]:
len(cluster_class)
Out[475]:
233335
In [476]:
final_data1.shape
Out[476]:
(233335, 31)
In [477]:
cluster_dt=merge_dt
In [478]:
cluster_dt['Cluster_class']=cluster_class

Customer Segmentation

In [479]:
cluster_dt.Cluster_class.value_counts()
Out[479]:
0    142465
1     59411
3     27746
2      3713
Name: Cluster_class, dtype: int64

0bservation- cluster 2 is small cluster than others

create credit class

In [47]:
credit_rate =[]
for i in range(len(cluster_dt.CreditRiskScore)):
    if cluster_dt.CreditRiskScore.iloc[i] > 650:
        credit_rate.append("high_score")
    elif cluster_dt.CreditRiskScore.iloc[i] < 650 and cluster_dt.CreditRiskScore.iloc[i] > 450 :
        credit_rate.append("Medium_score")
    else:
        credit_rate.append("low_score")
        
In [48]:
cluster_dt['credit_class']=credit_rate

create seperate dataframe for each cluster

In [49]:
cust_seg_1=cluster_dt[cluster_dt.Cluster_class == 0]
cust_seg_2=cluster_dt[cluster_dt.Cluster_class == 1]
cust_seg_3=cluster_dt[cluster_dt.Cluster_class == 2]
cust_seg_4=cluster_dt[cluster_dt.Cluster_class == 3]
In [60]:
################### 1 #########################################
cust_high_1=cust_seg_1[cust_seg_1.credit_class == 'high_score']
cust_medium_1=cust_seg_1[cust_seg_1.credit_class == 'Medium_score']
cust_low_1=cust_seg_1[cust_seg_1.credit_class == 'low_score']

################### 2 ###########################################

cust_high_2=cust_seg_2[cust_seg_2.credit_class == 'high_score']
cust_medium_2=cust_seg_2[cust_seg_2.credit_class == 'Medium_score']
cust_low_2=cust_seg_2[cust_seg_2.credit_class == 'low_score']

################### 3 ###########################################

cust_high_3=cust_seg_3[cust_seg_3.credit_class == 'high_score']
cust_medium_3=cust_seg_3[cust_seg_3.credit_class == 'Medium_score']
cust_low_3=cust_seg_3[cust_seg_3.credit_class == 'low_score']

################### 4 ###########################################

cust_high_4=cust_seg_4[cust_seg_4.credit_class == 'high_score']
cust_medium_4=cust_seg_4[cust_seg_4.credit_class == 'Medium_score']
cust_low_4=cust_seg_4[cust_seg_4.credit_class == 'low_score']
In [52]:
cluster_dt.credit_class.value_counts()
Out[52]:
high_score      132727
Medium_score     69836
low_score        30772
Name: credit_class, dtype: int64
customer seg -2
In [63]:
score_high = cust_high_1['CurrentInterestrate']
score_medium = cust_medium_1['CurrentInterestrate']
score_low = cust_low_1['CurrentInterestrate']
legend = ['High', 'Medium','Low']
plt.hist([score_high, score_medium ,score_low], color=['orange', 'green','blue'])
plt.xlabel("CurrentInterestrate")
plt.ylabel("Frequency")
plt.legend(legend)
#plt.xticks(range(0, 7))
#plt.yticks(range(1, 20))
plt.title('Histogram of CurrentInterestrate for clust 1')
plt.show()
customer seg -3
In [62]:
score_high = cust_high_2['CurrentInterestrate']
score_medium = cust_medium_2['CurrentInterestrate']
score_low = cust_low_2['CurrentInterestrate']
legend = ['High', 'Medium','Low']
plt.hist([score_high, score_medium ,score_low], color=['orange', 'green','blue'])
plt.xlabel("CurrentInterestrate")
plt.ylabel("Frequency")
plt.legend(legend)
#plt.xticks(range(0, 7))
#plt.yticks(range(1, 20))
plt.title('Histogram of CurrentInterestrate for clust 2')
plt.show()
customer seg -4
In [64]:
score_high = cust_high_3['CurrentInterestrate']
score_medium = cust_medium_3['CurrentInterestrate']
score_low = cust_low_3['CurrentInterestrate']
legend = ['High', 'Medium','Low']
plt.hist([score_high, score_medium ,score_low], color=['orange', 'green','blue'])
plt.xlabel("CurrentInterestrate")
plt.ylabel("Frequency")
plt.legend(legend)
#plt.xticks(range(0, 7))
#plt.yticks(range(1, 20))
plt.title('Histogram of CurrentInterestrate for clust 3')
plt.show()
customer seg -1
In [65]:
score_high = cust_high_4['CurrentInterestrate']
score_medium = cust_medium_4['CurrentInterestrate']
score_low = cust_low_4['CurrentInterestrate']
legend = ['High', 'Medium','Low']
plt.hist([score_high, score_medium ,score_low], color=['orange', 'green','blue'])
plt.xlabel("CurrentInterestrate")
plt.ylabel("Frequency")
plt.legend(legend)
#plt.xticks(range(0, 7))
#plt.yticks(range(1, 20))
plt.title('Histogram of CurrentInterestrate for clust 3')
plt.show()
In [41]:
cust_seg_1[['CurrentInterestrate','CreditRiskScore']].describe()
Out[41]:
CurrentInterestrate CreditRiskScore
count 142465.000 142465.000
mean 7.152 660.399
std 2.099 140.895
min 1.200 350.000
25% 6.000 554.000
50% 7.000 692.000
75% 8.300 789.000
max 37.500 830.000
In [43]:
cust_seg_2[['CurrentInterestrate','CreditRiskScore']].describe()
Out[43]:
CurrentInterestrate CreditRiskScore
count 59411.000 59411.000
mean 5.738 626.623
std 1.784 145.541
min 0.250 350.000
25% 4.875 498.000
50% 5.875 647.000
75% 6.750 765.000
max 37.500 830.000
In [44]:
cust_seg_3[['CurrentInterestrate','CreditRiskScore']].describe()
Out[44]:
CurrentInterestrate CreditRiskScore
count 3713.000 3713.000
mean 5.225 691.463
std 1.435 126.409
min 2.000 352.000
25% 4.125 621.000
50% 5.625 734.000
75% 6.125 794.000
max 10.375 829.000
In [46]:
cust_seg_4[['CurrentInterestrate','CreditRiskScore']].describe()
Out[46]:
CurrentInterestrate CreditRiskScore
count 27746.000 27746.000
mean 5.547 655.357
std 1.477 145.717
min 1.000 350.000
25% 4.875 531.000
50% 5.750 694.000
75% 6.375 791.000
max 14.000 830.000


K-means clustering on PCA Data

In [239]:
pca = PCA()
pca.fit(train_matrix2)
pca_samples = pca.transform(scaled_matrix)
In [243]:
fig, ax = plt.subplots(figsize=(14, 5))
sns.set(font_scale=1)
plt.step(range(matrix.shape[1]), pca.explained_variance_ratio_.cumsum(), where='mid',
         label='cumulative explained variance')
sns.barplot(np.arange(1,matrix.shape[1]+1), pca.explained_variance_ratio_, alpha=0.5, color = 'g',
            label='individual explained variance')
plt.xlim(0, 31)

ax.set_xticklabels([s if int(s.get_text())%2 == 0 else '' for s in ax.get_xticklabels()])

plt.ylabel('Explained variance', fontsize = 14)
plt.xlabel('Principal components', fontsize = 14)
plt.legend(loc='best', fontsize = 13);
In [ ]:
X_normalized = StandardScaler().fit(X_cancer).transform(X_cancer)  

pca = PCA(n_components = 2).fit(X_normalized)

pca_df = pca.transform(X_normalized)
In [97]:
Sum_of_squared_distances = []
K = range(1,10)
for k in K:
    km = KMeans(n_clusters=k,init='k-means++',n_init=100)
    km = km.fit(pca_df)
    Sum_of_squared_distances.append(km.inertia_)
In [98]:
plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')
plt.show()
In [107]:
%matplotlib notebook
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from adspy_shared_utilities import plot_labelled_scatter

kmeans2 = KMeans(n_clusters = 3, random_state = 0)
kmeans2.fit(pca_df)

plot_labelled_scatter(pca_df, kmeans2.labels_, 
                      ['Cluster 1', 'Cluster 2', 'Cluster 3'])


BUILDING ML MODEL

Implemented Models

7) Stacking-(Decision-Tree,SVM,Xg boost,RF,Light GBM,extra trees)

8) AdaBoost-AdaBoost)

11) XGBoost-XGBoost on PCA)

Also Used

In [65]:
merge_dt.columns
Out[65]:
Index(['CustomerID', 'Current_Instalment_Sequence', 'Starting_Instalment',
       'Maturity_Period', 'Current_Outstanding',
       'Current_Loan_to_Appraisedvalu_Percent', 'CurrentInterestrate',
       'RealEstate_Current_Inflation', 'GDP', 'UnemploymentRate', 'Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
       'Payment_Status', 'Salary', 'ProfessionalLicensure', 'UtilitySpending',
       'eCommerceAccount', 'SocialMediaAccount', 'NoOfProperties',
       'CreditRiskScore', 'age', 'Appraisal_value', 'current_Appraisal_value',
       'remaining_outstanding'],
      dtype='object')


Data Needs Grouping

In [66]:
merge_dt_1=merge_dt[['CustomerID','Current_Instalment_Sequence', 'Current_Outstanding','Current_Loan_to_Appraisedvalu_Percent','CurrentInterestrate',
                    'RealEstate_Current_Inflation','GDP', 'UnemploymentRate','current_Appraisal_value','remaining_outstanding']]
In [67]:
merge_dt_1.shape
Out[67]:
(233335, 10)


Data Don't Need Grouping

In [68]:
merge_dt_2=merge_dt[['CustomerID', 'Starting_Instalment','Maturity_Period','Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
        'age', 'Salary', 'ProfessionalLicensure',
       'UtilitySpending', 'eCommerceAccount', 'SocialMediaAccount','Appraisal_value',
       'NoOfProperties', 'CreditRiskScore']]
In [69]:
merge_dt_2=merge_dt_2.drop_duplicates()# drop duplicates from dataset
In [70]:
merge_dt_2.shape
Out[70]:
(11089, 20)


Function To Group The Data


First Approach

Build the model on whole dataset with out grouping and find the AVg of prediction at last for each group


Second Approach

Descriptive statistics involves summarizing and organizing the data so they can be easily understood. Descriptive statistics, unlike inferential statistics, seeks to describe the data
In [71]:
def feat_eng(data):
    df = pd.DataFrame()
    for col in data.columns:
        if col in ['CustomerID']:
            continue
        df[col + '_mean'] = data.groupby(['CustomerID'])[col].mean()
        df[col + '_median'] = data.groupby(['CustomerID'])[col].median()
        df[col + '_max'] = data.groupby(['CustomerID'])[col].max()
        df[col + '_min'] = data.groupby(['CustomerID'])[col].min()
        df[col + '_std'] = data.groupby(['CustomerID'])[col].std()
        #df[col + '_kurtosis'] = data.groupby(['CustomerID'])[col].kurtosis()
        df[col + '_skew'] = data.groupby(['CustomerID'])[col].skew() 
        df[col + '_range'] = df[col + '_max'] - df[col + '_min']
        df[col + '_kurtosis'] = data.groupby(['CustomerID'])[col].apply(lambda x: x.kurtosis())
        df[col + '_maxtoMin'] = df[col + '_max'] / df[col + '_min']
        #df[col + '_coeffvar'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.std(x) / np.mean(x))
        #in statistics, the median absolute deviation (MAD) is a robust measure of the variablility of a univariate sample of quantitative data.
        df[col + '_meanAD'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        df[col + '_mad'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.median(np.abs(np.diff(x))))
        df[col + '_abs_max'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.max(np.abs(x)))
        df[col + '_abs_min'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.min(np.abs(x)))
        df[col + '_abs_avg'] = (df[col + '_abs_min'] + df[col + '_abs_max'])/2
    return df 
In [72]:
final_dt_1=feat_eng(data=merge_dt_1)
In [73]:
final_dt_1.head(10)
Out[73]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg
CustomerID
C12116 36.500 36.500 48 25 7.763 0.000 23 -1.571 1.920 1.211 1.000 48 25 36.500 34807.537 32989.385 41303.420 29087.210 4632.101 0.327 12216.210 -1.670 1.420 642.958 380.600 41303.420 29087.210 35195.315 25.984 26.055 27.829 24.484 1.007 0.026 3.345 -0.908 1.137 0.541 0.424 27.829 24.484 26.156 9.200 9.200 9.200 9.200 0.000 0.000 0.000 0.000 1.000 0.000 0.000 9.200 9.200 9.200 1.807 1.672 2.263 1.464 0.299 0.476 0.798 -1.568 1.545 0.052 0.046 2.263 1.464 1.864 1.746 1.772 3.029 -0.314 0.935 -0.803 3.344 0.412 -9.634 0.656 0.492 3.029 0.241 1.635 7.050 7.050 10.000 4.400 2.343 0.036 5.600 -2.070 2.273 0.421 0.200 10.000 4.400 7.200 134633.839 124575.695 168596.837 109112.231 22269.436 0.476 59484.606 -1.568 1.545 3879.738 3457.021 168596.837 109112.231 138854.534 10192.463 12010.615 15912.790 3696.580 4632.101 -0.327 12216.210 -1.670 4.305 642.958 380.600 15912.790 3696.580 9804.685
C12117 29.500 29.500 34 25 3.028 0.000 9 -1.200 1.360 1.000 1.000 34 25 29.500 161086.428 160942.530 162452.330 160044.340 781.525 0.460 2407.990 -0.659 1.015 267.554 280.920 162452.330 160044.340 161248.335 62.596 59.531 73.431 57.099 6.122 0.869 16.333 -0.929 1.286 1.815 1.223 73.431 57.099 65.265 9.688 10.500 10.880 7.500 1.541 -0.898 3.380 -1.333 1.451 0.487 0.005 10.880 7.500 9.190 2.064 2.151 2.263 1.734 0.198 -0.707 0.529 -1.233 1.305 0.059 0.046 2.263 1.734 1.998 1.609 1.772 2.899 -0.314 0.927 -0.822 3.214 0.840 -9.220 0.636 0.581 2.899 0.314 1.607 4.810 4.700 5.800 4.400 0.396 1.952 1.400 4.594 1.318 0.211 0.200 5.800 4.400 5.100 259530.977 270380.129 284512.065 217951.153 24948.281 -0.707 66560.912 -1.233 1.305 7395.657 5833.824 284512.065 217951.153 251231.609 3413.572 3557.470 4455.660 2047.670 781.525 -0.460 2407.990 -0.659 2.176 267.554 280.920 4455.660 2047.670 3251.665
C12118 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 38846.072 39134.850 41740.320 34562.300 2256.890 -0.392 7178.020 -1.086 1.208 239.267 221.695 41740.320 34562.300 38151.310 20.430 20.484 24.220 16.898 2.658 0.039 7.322 -1.719 1.433 0.609 0.591 24.220 16.898 20.559 12.960 12.125 15.000 12.125 1.224 0.903 2.875 -1.129 1.237 0.133 0.000 15.000 12.125 13.562 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 193146.892 193921.317 243832.054 157802.838 26916.799 0.607 86029.215 -0.824 1.545 5163.118 4288.531 243832.054 157802.838 200817.446 5153.928 4865.150 9437.700 2259.680 2256.890 0.392 7178.020 -1.086 4.177 239.267 221.695 9437.700 2259.680 5848.690
C12119 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 30473.177 30594.560 32849.740 27355.730 1711.629 -0.249 5494.010 -1.162 1.201 183.134 161.585 32849.740 27355.730 30102.735 22.122 22.146 26.186 18.467 2.808 0.046 7.719 -1.717 1.418 0.646 0.621 26.186 18.467 22.326 12.350 12.350 12.350 12.350 0.000 0.000 0.000 0.000 1.000 0.000 0.000 12.350 12.350 12.350 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 139887.665 140448.546 176596.663 114289.546 19494.635 0.607 62307.117 -0.824 1.545 3739.416 3105.991 176596.663 114289.546 145443.104 4326.823 4205.440 7444.270 1950.260 1711.629 0.249 5494.010 -1.162 3.817 183.134 161.585 7444.270 1950.260 4697.265
C12120 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 50557.092 50619.550 56330.070 43102.830 4026.425 -0.146 13227.240 -1.126 1.307 448.893 366.360 56330.070 43102.830 49716.450 23.319 23.981 27.352 18.516 2.946 -0.082 8.836 -1.604 1.477 0.688 0.558 27.352 18.516 22.934 8.363 9.250 11.375 4.000 2.819 -0.707 7.375 -1.144 2.844 0.321 0.000 11.375 4.000 7.688 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 219821.312 220702.687 277506.313 179596.091 30634.125 0.607 97910.221 -0.824 1.545 5876.167 4880.795 277506.313 179596.091 228551.202 11042.908 10980.450 18497.170 5269.930 4026.425 0.146 13227.240 -1.126 3.510 448.893 366.360 18497.170 5269.930 11883.550
C12121 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 130718.781 131097.870 137909.430 123379.990 4384.155 -0.073 14529.440 -1.107 1.118 660.429 586.470 137909.430 123379.990 130644.710 100.725 104.452 114.384 83.054 10.626 -0.464 31.330 -1.371 1.377 2.952 3.072 114.384 83.054 98.719 7.128 6.780 10.780 6.780 1.027 3.068 4.000 8.765 1.590 0.182 0.000 10.780 6.780 8.780 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 130880.295 126046.185 148553.873 114612.342 11206.931 0.310 33941.531 -1.489 1.296 3485.433 3114.764 148553.873 114612.342 131583.107 10957.569 10578.480 18296.360 3766.920 4384.155 0.073 14529.440 -1.107 4.857 660.429 586.470 18296.360 3766.920 11031.640
C12122 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 56055.161 56179.910 58371.540 53760.910 1364.415 -0.053 4610.630 -1.030 1.086 209.574 179.675 58371.540 53760.910 56066.225 89.952 93.499 102.126 75.400 8.897 -0.406 26.726 -1.392 1.354 2.581 2.733 102.126 75.400 88.763 9.135 9.005 11.000 9.005 0.456 3.713 1.995 13.989 1.222 0.091 0.000 11.000 9.005 10.003 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 62818.248 60498.033 71300.985 55010.164 5378.959 0.310 16290.822 -1.489 1.296 1672.894 1494.984 71300.985 55010.164 63155.575 3444.839 3320.090 5739.090 1128.460 1364.415 0.053 4610.630 -1.030 5.086 209.574 179.675 5739.090 1128.460 3433.775
C12123 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 39207.158 39218.970 40639.770 37889.320 794.931 0.073 2750.450 -0.847 1.073 125.020 105.820 40639.770 37889.320 39264.545 97.256 100.971 110.236 82.166 9.276 -0.382 28.070 -1.387 1.342 2.746 2.948 110.236 82.166 96.201 9.696 9.500 12.250 9.500 0.666 3.422 2.750 11.293 1.289 0.125 0.000 12.250 9.500 10.875 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 40627.098 39126.522 46113.227 35577.295 3478.790 0.310 10535.932 -1.489 1.296 1081.928 966.867 46113.227 35577.295 40845.261 2022.542 2010.730 3340.380 589.930 794.931 -0.073 2750.450 -0.847 5.662 125.020 105.820 3340.380 589.930 1965.155
C12124 38.857 40.000 43 33 3.891 -0.800 10 -1.028 1.303 1.667 1.000 43 33 38.000 234750.287 233922.410 237454.280 233595.940 1656.008 1.232 3858.340 -0.578 1.017 643.057 163.235 237454.280 233595.940 235525.110 106.611 107.683 111.104 97.593 5.185 -1.109 13.510 -0.057 1.138 3.398 3.421 111.104 97.593 104.349 9.696 10.000 10.625 8.000 1.192 -1.006 2.625 -1.053 1.328 0.438 0.000 10.625 8.000 9.312 1.638 1.610 1.805 1.560 0.094 1.239 0.245 0.173 1.157 0.057 0.049 1.805 1.560 1.683 1.468 1.586 3.029 -0.314 1.410 -0.287 3.344 -1.956 -9.634 0.805 0.722 3.029 0.241 1.635 8.471 9.400 10.000 5.000 2.123 -1.230 5.000 -0.571 2.000 0.967 0.350 10.000 5.000 7.500 220724.166 217000.309 243309.912 210315.082 12736.014 1.239 32994.830 0.173 1.157 7678.127 6611.096 243309.912 210315.082 226812.497 9249.713 10077.590 10404.060 6545.720 1656.008 -1.232 3858.340 -0.578 1.589 643.057 163.235 10404.060 6545.720 8474.890
C12125 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 56171.800 56275.820 58681.020 53815.770 1433.799 0.016 4865.250 -1.025 1.090 221.148 188.040 58681.020 53815.770 56248.395 102.160 106.110 115.941 85.540 10.156 -0.414 30.400 -1.394 1.355 2.924 3.046 115.941 85.540 100.741 8.605 8.605 8.605 8.605 0.000 0.000 0.000 0.000 1.000 0.000 0.000 8.605 8.605 8.605 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 55427.866 53380.618 62912.634 48538.380 4746.141 0.310 14374.254 -1.489 1.296 1476.083 1319.104 62912.634 48538.380 55725.507 3828.200 3724.180 6184.230 1318.980 1433.799 -0.016 4865.250 -1.025 4.689 221.148 188.040 6184.230 1318.980 3751.605
In [74]:
final_dt_1['CustomerID']=final_dt_1.index
In [75]:
final_dt_1.head(10)
Out[75]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg CustomerID
CustomerID
C12116 36.500 36.500 48 25 7.763 0.000 23 -1.571 1.920 1.211 1.000 48 25 36.500 34807.537 32989.385 41303.420 29087.210 4632.101 0.327 12216.210 -1.670 1.420 642.958 380.600 41303.420 29087.210 35195.315 25.984 26.055 27.829 24.484 1.007 0.026 3.345 -0.908 1.137 0.541 0.424 27.829 24.484 26.156 9.200 9.200 9.200 9.200 0.000 0.000 0.000 0.000 1.000 0.000 0.000 9.200 9.200 9.200 1.807 1.672 2.263 1.464 0.299 0.476 0.798 -1.568 1.545 0.052 0.046 2.263 1.464 1.864 1.746 1.772 3.029 -0.314 0.935 -0.803 3.344 0.412 -9.634 0.656 0.492 3.029 0.241 1.635 7.050 7.050 10.000 4.400 2.343 0.036 5.600 -2.070 2.273 0.421 0.200 10.000 4.400 7.200 134633.839 124575.695 168596.837 109112.231 22269.436 0.476 59484.606 -1.568 1.545 3879.738 3457.021 168596.837 109112.231 138854.534 10192.463 12010.615 15912.790 3696.580 4632.101 -0.327 12216.210 -1.670 4.305 642.958 380.600 15912.790 3696.580 9804.685 C12116
C12117 29.500 29.500 34 25 3.028 0.000 9 -1.200 1.360 1.000 1.000 34 25 29.500 161086.428 160942.530 162452.330 160044.340 781.525 0.460 2407.990 -0.659 1.015 267.554 280.920 162452.330 160044.340 161248.335 62.596 59.531 73.431 57.099 6.122 0.869 16.333 -0.929 1.286 1.815 1.223 73.431 57.099 65.265 9.688 10.500 10.880 7.500 1.541 -0.898 3.380 -1.333 1.451 0.487 0.005 10.880 7.500 9.190 2.064 2.151 2.263 1.734 0.198 -0.707 0.529 -1.233 1.305 0.059 0.046 2.263 1.734 1.998 1.609 1.772 2.899 -0.314 0.927 -0.822 3.214 0.840 -9.220 0.636 0.581 2.899 0.314 1.607 4.810 4.700 5.800 4.400 0.396 1.952 1.400 4.594 1.318 0.211 0.200 5.800 4.400 5.100 259530.977 270380.129 284512.065 217951.153 24948.281 -0.707 66560.912 -1.233 1.305 7395.657 5833.824 284512.065 217951.153 251231.609 3413.572 3557.470 4455.660 2047.670 781.525 -0.460 2407.990 -0.659 2.176 267.554 280.920 4455.660 2047.670 3251.665 C12117
C12118 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 38846.072 39134.850 41740.320 34562.300 2256.890 -0.392 7178.020 -1.086 1.208 239.267 221.695 41740.320 34562.300 38151.310 20.430 20.484 24.220 16.898 2.658 0.039 7.322 -1.719 1.433 0.609 0.591 24.220 16.898 20.559 12.960 12.125 15.000 12.125 1.224 0.903 2.875 -1.129 1.237 0.133 0.000 15.000 12.125 13.562 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 193146.892 193921.317 243832.054 157802.838 26916.799 0.607 86029.215 -0.824 1.545 5163.118 4288.531 243832.054 157802.838 200817.446 5153.928 4865.150 9437.700 2259.680 2256.890 0.392 7178.020 -1.086 4.177 239.267 221.695 9437.700 2259.680 5848.690 C12118
C12119 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 30473.177 30594.560 32849.740 27355.730 1711.629 -0.249 5494.010 -1.162 1.201 183.134 161.585 32849.740 27355.730 30102.735 22.122 22.146 26.186 18.467 2.808 0.046 7.719 -1.717 1.418 0.646 0.621 26.186 18.467 22.326 12.350 12.350 12.350 12.350 0.000 0.000 0.000 0.000 1.000 0.000 0.000 12.350 12.350 12.350 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 139887.665 140448.546 176596.663 114289.546 19494.635 0.607 62307.117 -0.824 1.545 3739.416 3105.991 176596.663 114289.546 145443.104 4326.823 4205.440 7444.270 1950.260 1711.629 0.249 5494.010 -1.162 3.817 183.134 161.585 7444.270 1950.260 4697.265 C12119
C12120 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 50557.092 50619.550 56330.070 43102.830 4026.425 -0.146 13227.240 -1.126 1.307 448.893 366.360 56330.070 43102.830 49716.450 23.319 23.981 27.352 18.516 2.946 -0.082 8.836 -1.604 1.477 0.688 0.558 27.352 18.516 22.934 8.363 9.250 11.375 4.000 2.819 -0.707 7.375 -1.144 2.844 0.321 0.000 11.375 4.000 7.688 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 219821.312 220702.687 277506.313 179596.091 30634.125 0.607 97910.221 -0.824 1.545 5876.167 4880.795 277506.313 179596.091 228551.202 11042.908 10980.450 18497.170 5269.930 4026.425 0.146 13227.240 -1.126 3.510 448.893 366.360 18497.170 5269.930 11883.550 C12120
C12121 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 130718.781 131097.870 137909.430 123379.990 4384.155 -0.073 14529.440 -1.107 1.118 660.429 586.470 137909.430 123379.990 130644.710 100.725 104.452 114.384 83.054 10.626 -0.464 31.330 -1.371 1.377 2.952 3.072 114.384 83.054 98.719 7.128 6.780 10.780 6.780 1.027 3.068 4.000 8.765 1.590 0.182 0.000 10.780 6.780 8.780 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 130880.295 126046.185 148553.873 114612.342 11206.931 0.310 33941.531 -1.489 1.296 3485.433 3114.764 148553.873 114612.342 131583.107 10957.569 10578.480 18296.360 3766.920 4384.155 0.073 14529.440 -1.107 4.857 660.429 586.470 18296.360 3766.920 11031.640 C12121
C12122 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 56055.161 56179.910 58371.540 53760.910 1364.415 -0.053 4610.630 -1.030 1.086 209.574 179.675 58371.540 53760.910 56066.225 89.952 93.499 102.126 75.400 8.897 -0.406 26.726 -1.392 1.354 2.581 2.733 102.126 75.400 88.763 9.135 9.005 11.000 9.005 0.456 3.713 1.995 13.989 1.222 0.091 0.000 11.000 9.005 10.003 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 62818.248 60498.033 71300.985 55010.164 5378.959 0.310 16290.822 -1.489 1.296 1672.894 1494.984 71300.985 55010.164 63155.575 3444.839 3320.090 5739.090 1128.460 1364.415 0.053 4610.630 -1.030 5.086 209.574 179.675 5739.090 1128.460 3433.775 C12122
C12123 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 39207.158 39218.970 40639.770 37889.320 794.931 0.073 2750.450 -0.847 1.073 125.020 105.820 40639.770 37889.320 39264.545 97.256 100.971 110.236 82.166 9.276 -0.382 28.070 -1.387 1.342 2.746 2.948 110.236 82.166 96.201 9.696 9.500 12.250 9.500 0.666 3.422 2.750 11.293 1.289 0.125 0.000 12.250 9.500 10.875 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 40627.098 39126.522 46113.227 35577.295 3478.790 0.310 10535.932 -1.489 1.296 1081.928 966.867 46113.227 35577.295 40845.261 2022.542 2010.730 3340.380 589.930 794.931 -0.073 2750.450 -0.847 5.662 125.020 105.820 3340.380 589.930 1965.155 C12123
C12124 38.857 40.000 43 33 3.891 -0.800 10 -1.028 1.303 1.667 1.000 43 33 38.000 234750.287 233922.410 237454.280 233595.940 1656.008 1.232 3858.340 -0.578 1.017 643.057 163.235 237454.280 233595.940 235525.110 106.611 107.683 111.104 97.593 5.185 -1.109 13.510 -0.057 1.138 3.398 3.421 111.104 97.593 104.349 9.696 10.000 10.625 8.000 1.192 -1.006 2.625 -1.053 1.328 0.438 0.000 10.625 8.000 9.312 1.638 1.610 1.805 1.560 0.094 1.239 0.245 0.173 1.157 0.057 0.049 1.805 1.560 1.683 1.468 1.586 3.029 -0.314 1.410 -0.287 3.344 -1.956 -9.634 0.805 0.722 3.029 0.241 1.635 8.471 9.400 10.000 5.000 2.123 -1.230 5.000 -0.571 2.000 0.967 0.350 10.000 5.000 7.500 220724.166 217000.309 243309.912 210315.082 12736.014 1.239 32994.830 0.173 1.157 7678.127 6611.096 243309.912 210315.082 226812.497 9249.713 10077.590 10404.060 6545.720 1656.008 -1.232 3858.340 -0.578 1.589 643.057 163.235 10404.060 6545.720 8474.890 C12124
C12125 48.043 48.000 60 33 7.825 -0.235 27 -0.821 1.818 1.227 1.000 60 33 46.500 56171.800 56275.820 58681.020 53815.770 1433.799 0.016 4865.250 -1.025 1.090 221.148 188.040 58681.020 53815.770 56248.395 102.160 106.110 115.941 85.540 10.156 -0.414 30.400 -1.394 1.355 2.924 3.046 115.941 85.540 100.741 8.605 8.605 8.605 8.605 0.000 0.000 0.000 0.000 1.000 0.000 0.000 8.605 8.605 8.605 1.672 1.611 1.898 1.464 0.143 0.310 0.434 -1.489 1.296 0.045 0.040 1.898 1.464 1.681 1.848 1.876 3.029 -0.314 0.946 -0.899 3.344 0.238 -9.634 0.633 0.478 3.029 0.241 1.635 7.852 8.200 10.000 5.000 1.550 -0.307 5.000 -1.208 2.000 0.432 0.200 10.000 5.000 7.500 55427.866 53380.618 62912.634 48538.380 4746.141 0.310 14374.254 -1.489 1.296 1476.083 1319.104 62912.634 48538.380 55725.507 3828.200 3724.180 6184.230 1318.980 1433.799 -0.016 4865.250 -1.025 4.689 221.148 188.040 6184.230 1318.980 3751.605 C12125
In [76]:
final_dt_1.shape
Out[76]:
(11089, 127)
In [77]:
final_dt_1.describe(include='all')
Out[77]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg CustomerID
count 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 10975.000 10975.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089.000 11075.000 11089.000 11089.000 11089.000 11089.000 11089.000 11089
unique nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 11089
top nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan C13200
freq nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 1
mean 35.497 35.869 46.806 23.823 7.350 -0.012 22.983 -1.167 2.093 1.159 1.000 46.806 23.823 35.315 248289.631 249362.280 261101.260 229811.996 10136.210 -0.249 31289.264 -0.457 inf 1731.440 1086.979 261101.260 229811.996 245456.628 80.559 80.852 94.920 65.917 9.738 0.080 29.003 -0.782 inf 2.678 2.200 94.920 65.917 80.419 6.813 6.723 7.555 6.251 0.530 0.162 1.304 0.240 1.373 0.095 0.011 7.555 6.251 6.903 1.858 1.844 2.207 1.550 0.220 0.143 0.658 -0.787 1.434 0.055 0.045 2.207 1.550 1.879 1.883 1.969 3.197 -0.118 0.937 -0.663 3.315 0.089 -7.807 0.579 0.457 3.197 0.369 1.783 6.419 6.291 8.836 4.468 1.574 0.420 4.367 -0.293 1.978 0.364 0.190 8.836 4.468 6.652 328590.128 325950.137 389975.276 273914.343 39081.734 0.142 116060.933 -0.787 1.434 9789.261 8058.864 389975.276 273914.343 331944.809 12207.582 11134.932 30685.216 -604.048 10136.210 0.249 31289.264 -0.457 inf 1731.440 1086.979 33469.753 2804.857 18137.305 NaN
std 8.139 8.702 12.075 5.676 3.278 0.299 10.843 0.526 0.842 0.150 0.009 12.075 5.676 7.721 216230.171 217594.451 227213.225 205515.458 20436.895 0.844 62382.892 3.124 nan 4004.533 1804.103 227213.225 205515.458 214378.918 22.241 23.450 25.113 20.159 4.304 0.751 14.305 1.543 nan 1.441 0.987 25.113 20.159 21.619 1.804 1.936 2.043 2.208 0.795 0.923 2.015 3.029 0.826 0.165 0.055 2.043 2.208 1.873 0.162 0.215 0.113 0.152 0.054 0.725 0.150 0.903 0.124 0.011 0.011 0.113 0.152 0.111 0.398 0.425 0.454 0.530 0.125 0.256 0.533 0.585 6.021 0.082 0.081 0.454 0.360 0.339 1.056 1.421 1.922 0.240 0.757 0.874 1.893 2.534 0.424 0.167 0.031 1.922 0.240 0.990 310459.831 309523.280 367163.871 258358.514 39508.474 0.725 115141.608 0.903 0.124 9656.120 8128.923 367163.871 258358.514 312193.557 38414.072 40727.176 68362.861 27317.105 20436.895 0.844 62382.892 3.124 nan 4004.533 1804.103 68104.110 23927.064 40835.668 NaN
min 6.500 6.500 10.000 3.000 1.871 -1.301 5.000 -2.758 1.154 1.000 1.000 10.000 3.000 6.500 3737.721 3639.095 5284.810 0.000 0.000 -6.067 0.000 -3.089 1.000 0.000 0.000 5284.810 0.000 3636.495 4.615 0.456 7.945 0.000 0.759 -4.021 2.530 -2.476 1.052 0.340 0.072 7.945 0.000 4.311 2.000 0.250 2.000 0.250 0.000 -6.164 0.000 -3.333 1.000 0.000 0.000 2.000 0.250 2.000 1.243 1.233 1.386 1.136 0.033 -2.258 0.101 -2.139 1.067 0.031 0.018 1.386 1.136 1.261 0.804 0.971 1.851 -0.314 0.447 -1.545 1.333 -1.996 -14.752 0.272 0.209 1.851 0.205 1.046 4.700 4.700 5.000 3.900 0.189 -2.554 0.600 -3.192 1.099 0.100 0.100 5.000 3.900 4.700 18353.861 17816.055 22718.739 14703.077 1049.459 -2.258 3402.479 -2.615 1.067 502.582 340.499 22718.739 14703.077 18710.908 -158248.953 -178587.460 -27622.060 -245697.840 0.000 -5.981 0.000 -3.089 -186243.386 0.000 0.000 0.000 0.000 0.000 NaN
25% 29.500 29.500 34.000 21.000 4.183 -0.195 13.000 -1.365 1.550 1.000 1.000 34.000 21.000 30.000 97308.834 97641.690 102400.000 88499.220 1519.415 -0.437 4528.180 -1.336 1.031 306.366 208.580 102400.000 88499.220 96261.760 65.111 64.166 76.512 54.589 6.947 -0.316 20.224 -1.578 1.327 1.870 1.452 76.512 54.589 65.383 5.625 5.750 6.125 5.000 0.000 0.000 0.000 -0.403 1.000 0.000 0.000 6.125 5.000 5.627 1.748 1.672 2.197 1.464 0.188 -0.419 0.529 -1.342 1.305 0.048 0.040 2.197 1.464 1.844 1.664 1.717 3.029 -0.314 0.868 -0.806 3.290 -0.160 -9.926 0.539 0.424 3.029 0.241 1.635 5.239 5.000 6.200 4.400 0.639 -0.139 2.000 -1.528 1.513 0.237 0.200 6.200 4.400 5.200 126425.371 124820.841 150481.046 105226.600 14265.564 -0.419 43086.919 -1.342 1.305 3608.154 2923.116 150481.046 105226.600 127879.046 1640.067 1271.060 3506.580 23.530 1519.415 -0.051 4528.180 -1.336 4.941 306.366 208.580 5105.930 163.170 2820.180 NaN
50% 36.500 36.500 47.000 25.000 7.177 0.000 22.000 -1.200 1.935 1.138 1.000 47.000 25.000 36.500 176842.796 176699.490 184192.850 163677.930 4022.133 -0.108 12338.520 -1.192 1.078 659.086 488.080 184192.850 163677.930 174973.900 82.719 81.626 97.453 69.757 9.624 -0.012 28.140 -1.279 1.411 2.467 2.023 97.453 69.757 83.436 6.625 6.625 7.125 6.450 0.000 0.000 0.000 0.000 1.000 0.000 0.000 7.125 6.450 6.625 1.827 1.801 2.263 1.464 0.228 0.345 0.701 -0.973 1.453 0.052 0.046 2.263 1.464 1.864 1.818 1.851 3.029 -0.314 0.921 -0.730 3.344 0.230 -9.634 0.590 0.463 3.029 0.241 1.635 6.803 6.200 10.000 4.400 1.889 0.196 5.500 -1.354 2.222 0.355 0.200 10.000 4.400 7.200 222010.210 219653.761 263486.895 185939.665 25689.286 0.345 76765.526 -0.973 1.453 6521.896 5326.042 263486.895 185939.665 223983.127 4858.470 4098.090 10395.620 404.270 4022.133 0.108 12338.520 -1.192 13.978 659.086 488.080 13430.170 540.930 7279.220 NaN
75% 42.167 43.500 60.000 28.000 10.248 0.000 32.000 -1.161 2.400 1.211 1.000 60.000 28.000 41.500 350471.542 351633.625 370003.470 324300.000 10274.066 0.051 31749.780 -0.849 1.159 1722.222 1324.520 370003.470 324300.000 345947.005 97.582 98.853 113.658 80.116 11.928 0.560 35.121 -0.507 1.482 3.146 2.921 113.658 80.116 96.881 7.875 7.850 8.600 7.614 1.089 0.032 2.750 0.000 1.520 0.159 0.000 8.600 7.614 8.000 1.986 2.067 2.263 1.582 0.263 0.659 0.798 -0.404 1.545 0.059 0.049 2.263 1.582 1.913 2.044 2.213 3.347 -0.314 1.010 -0.590 3.593 0.404 -9.220 0.620 0.478 3.347 0.241 1.899 7.210 7.600 10.000 4.500 2.076 0.660 5.600 -0.406 2.273 0.414 0.200 10.000 4.500 7.250 443271.635 435767.573 530792.028 371629.321 50983.526 0.659 154632.080 -0.404 1.545 13161.984 10726.257 530792.028 371629.321 451373.576 13235.743 11639.520 29391.940 1398.680 10274.066 0.437 31749.780 -0.849 43.739 1722.222 1324.520 34022.530 1725.160 18472.810 NaN
max 55.500 55.500 60.000 51.000 15.476 1.627 50.000 3.051 13.000 2.333 2.000 60.000 51.000 55.500 2999341.627 3000000.000 3000000.000 2994074.640 480645.278 5.981 1094989.380 36.870 inf 99200.000 63298.320 3000000.000 2994074.640 2997037.320 141.676 154.876 169.358 114.128 56.736 3.247 169.134 20.512 inf 27.129 7.251 169.358 114.128 133.756 18.000 18.000 37.500 18.000 9.057 6.325 32.625 40.000 50.000 3.068 1.000 37.500 18.000 22.062 2.195 2.208 2.263 2.089 0.410 1.717 1.127 5.802 1.993 0.118 0.121 2.263 2.089 2.176 3.548 3.359 4.320 2.987 1.426 1.125 4.635 2.242 21.089 0.932 1.097 4.320 2.987 3.654 9.543 9.600 10.000 9.100 2.703 3.644 6.100 14.477 2.564 1.160 0.500 10.000 9.100 9.550 4811459.042 4906684.081 5927579.747 4072158.508 674721.539 1.717 1855421.238 5.802 1.993 196315.044 156671.371 5927579.747 4072158.508 4999869.128 2125664.060 2125664.060 2251328.120 2000000.000 480645.278 6.067 1094989.380 36.870 inf 99200.000 63298.320 2251328.120 2000000.000 2125664.060 NaN


Join Two Column Merge dt 2 And final dt 1

In [78]:
train_data_1=final_dt_1.merge(merge_dt_2, left_on="CustomerID", right_on='CustomerID')
In [79]:
train_data_1.fillna(train_data_1['remaining_outstanding_maxtoMin'].median(),inplace=True)
In [80]:
train_data_1.isna().sum()
Out[80]:
Current_Instalment_Sequence_mean                  0
Current_Instalment_Sequence_median                0
Current_Instalment_Sequence_max                   0
Current_Instalment_Sequence_min                   0
Current_Instalment_Sequence_std                   0
Current_Instalment_Sequence_skew                  0
Current_Instalment_Sequence_range                 0
Current_Instalment_Sequence_kurtosis              0
Current_Instalment_Sequence_maxtoMin              0
Current_Instalment_Sequence_meanAD                0
Current_Instalment_Sequence_mad                   0
Current_Instalment_Sequence_abs_max               0
Current_Instalment_Sequence_abs_min               0
Current_Instalment_Sequence_abs_avg               0
Current_Outstanding_mean                          0
Current_Outstanding_median                        0
Current_Outstanding_max                           0
Current_Outstanding_min                           0
Current_Outstanding_std                           0
Current_Outstanding_skew                          0
Current_Outstanding_range                         0
Current_Outstanding_kurtosis                      0
Current_Outstanding_maxtoMin                      0
Current_Outstanding_meanAD                        0
Current_Outstanding_mad                           0
Current_Outstanding_abs_max                       0
Current_Outstanding_abs_min                       0
Current_Outstanding_abs_avg                       0
Current_Loan_to_Appraisedvalu_Percent_mean        0
Current_Loan_to_Appraisedvalu_Percent_median      0
Current_Loan_to_Appraisedvalu_Percent_max         0
Current_Loan_to_Appraisedvalu_Percent_min         0
Current_Loan_to_Appraisedvalu_Percent_std         0
Current_Loan_to_Appraisedvalu_Percent_skew        0
Current_Loan_to_Appraisedvalu_Percent_range       0
Current_Loan_to_Appraisedvalu_Percent_kurtosis    0
Current_Loan_to_Appraisedvalu_Percent_maxtoMin    0
Current_Loan_to_Appraisedvalu_Percent_meanAD      0
Current_Loan_to_Appraisedvalu_Percent_mad         0
Current_Loan_to_Appraisedvalu_Percent_abs_max     0
Current_Loan_to_Appraisedvalu_Percent_abs_min     0
Current_Loan_to_Appraisedvalu_Percent_abs_avg     0
CurrentInterestrate_mean                          0
CurrentInterestrate_median                        0
CurrentInterestrate_max                           0
CurrentInterestrate_min                           0
CurrentInterestrate_std                           0
CurrentInterestrate_skew                          0
CurrentInterestrate_range                         0
CurrentInterestrate_kurtosis                      0
CurrentInterestrate_maxtoMin                      0
CurrentInterestrate_meanAD                        0
CurrentInterestrate_mad                           0
CurrentInterestrate_abs_max                       0
CurrentInterestrate_abs_min                       0
CurrentInterestrate_abs_avg                       0
RealEstate_Current_Inflation_mean                 0
RealEstate_Current_Inflation_median               0
RealEstate_Current_Inflation_max                  0
RealEstate_Current_Inflation_min                  0
RealEstate_Current_Inflation_std                  0
RealEstate_Current_Inflation_skew                 0
RealEstate_Current_Inflation_range                0
RealEstate_Current_Inflation_kurtosis             0
RealEstate_Current_Inflation_maxtoMin             0
RealEstate_Current_Inflation_meanAD               0
RealEstate_Current_Inflation_mad                  0
RealEstate_Current_Inflation_abs_max              0
RealEstate_Current_Inflation_abs_min              0
RealEstate_Current_Inflation_abs_avg              0
GDP_mean                                          0
GDP_median                                        0
GDP_max                                           0
GDP_min                                           0
GDP_std                                           0
GDP_skew                                          0
GDP_range                                         0
GDP_kurtosis                                      0
GDP_maxtoMin                                      0
GDP_meanAD                                        0
GDP_mad                                           0
GDP_abs_max                                       0
GDP_abs_min                                       0
GDP_abs_avg                                       0
UnemploymentRate_mean                             0
UnemploymentRate_median                           0
UnemploymentRate_max                              0
UnemploymentRate_min                              0
UnemploymentRate_std                              0
UnemploymentRate_skew                             0
UnemploymentRate_range                            0
UnemploymentRate_kurtosis                         0
UnemploymentRate_maxtoMin                         0
UnemploymentRate_meanAD                           0
UnemploymentRate_mad                              0
UnemploymentRate_abs_max                          0
UnemploymentRate_abs_min                          0
UnemploymentRate_abs_avg                          0
current_Appraisal_value_mean                      0
current_Appraisal_value_median                    0
current_Appraisal_value_max                       0
current_Appraisal_value_min                       0
current_Appraisal_value_std                       0
current_Appraisal_value_skew                      0
current_Appraisal_value_range                     0
current_Appraisal_value_kurtosis                  0
current_Appraisal_value_maxtoMin                  0
current_Appraisal_value_meanAD                    0
current_Appraisal_value_mad                       0
current_Appraisal_value_abs_max                   0
current_Appraisal_value_abs_min                   0
current_Appraisal_value_abs_avg                   0
remaining_outstanding_mean                        0
remaining_outstanding_median                      0
remaining_outstanding_max                         0
remaining_outstanding_min                         0
remaining_outstanding_std                         0
remaining_outstanding_skew                        0
remaining_outstanding_range                       0
remaining_outstanding_kurtosis                    0
remaining_outstanding_maxtoMin                    0
remaining_outstanding_meanAD                      0
remaining_outstanding_mad                         0
remaining_outstanding_abs_max                     0
remaining_outstanding_abs_min                     0
remaining_outstanding_abs_avg                     0
CustomerID                                        0
Starting_Instalment                               0
Maturity_Period                                   0
Asset_type                                        0
Urban_Development                                 0
Villa_House                                       0
Investment_SelfOccupied                           0
Starting_outstanding                              0
Starting_Loan_to_Appraisedvalu_Percent            0
StartingInterestrate                              0
RealEstate_Starting_Inflation                     0
age                                               0
Salary                                            0
ProfessionalLicensure                             0
UtilitySpending                                   0
eCommerceAccount                                  0
SocialMediaAccount                                0
Appraisal_value                                   0
NoOfProperties                                    0
CreditRiskScore                                   0
dtype: int64
In [81]:
train_data_1[['eCommerceAccount','ProfessionalLicensure']]=train_data_1[['eCommerceAccount','ProfessionalLicensure']].astype('object')
In [82]:
#num1_cols = list(train_data_1.select_dtypes(include=['float64','float32','int32','int64']).columns)
#cat1_cols = list(train_data_1.select_dtypes(include=['object']).columns)
categorical_features =train_data_1.select_dtypes(include=['object']).columns
numerical_features =train_data_1.select_dtypes(include=['float64','float32','int32','int64']).columns


Feature Engg On Payment Status Column

In [83]:
new=merge_dt[['CustomerID','Current_Instalment_Sequence', 'Current_Outstanding','Current_Loan_to_Appraisedvalu_Percent','CurrentInterestrate',
                    'RealEstate_Current_Inflation','GDP', 'UnemploymentRate','current_Appraisal_value','remaining_outstanding','Payment_Status']]
In [84]:
new.Payment_Status.unique()
Out[84]:
array(['Non-Payoff/Non-Default', 'Default', 'Payoff'], dtype=object)
In [85]:
payment={'Non-Payoff/Non-Default': 2,'Payoff':4,'Default':0}

new['Payment_Status']=new['Payment_Status'].map(payment)
create payment_total_score
In [86]:
new['payment_total_score'] =[4]*new.shape[0]
create payment dataframe
In [87]:
payment_data=pd.DataFrame({'CustomerID':new.CustomerID,'Payment_Status':new.Payment_Status,'payment_total_score':new.payment_total_score})
define function to find z score of payment status
In [88]:
def payment_eng1(data):
    df = pd.DataFrame()
    for col in data.columns:
        if col in ['CustomerID']:
            continue
        df[col + '_count'] = data.groupby(['CustomerID'])[col].count()
        df[col + '_sum'] = data.groupby(['CustomerID'])[col].sum()
    return df
In [89]:
payment_data=payment_eng1(data=payment_data)
In [90]:
payment_data.head(5)
Out[90]:
Payment_Status_count Payment_Status_sum payment_total_score_count payment_total_score_sum
CustomerID
C12116 20 38 20 80
C12117 10 18 10 40
C12118 31 62 31 124
C12119 31 62 31 124
C12120 31 62 31 124
In [91]:
from scipy.stats import zscore
payment_percentile= payment_data.Payment_Status_sum/payment_data.payment_total_score_sum
payment_data['payment_z_score']=zscore(payment_percentile)
payment_data['payment_lenght']=payment_data.Payment_Status_count
In [92]:
print('size of train data',train_data_1.shape)
print('size of payment col',len(payment_percentile))
size of train data (11089, 146)
size of payment col 11089
In [94]:
payment_data2=payment_data[['payment_z_score','payment_lenght']]
In [95]:
train_data_1=train_data_1.merge(payment_data2, left_on="CustomerID", right_on='CustomerID')
In [96]:
train_data_1.head(5)
Out[96]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg CustomerID Starting_Instalment Maturity_Period Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation age Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount Appraisal_value NoOfProperties CreditRiskScore payment_z_score payment_lenght
0 36.500 36.500 48 25 7.763 0.000 23 -1.571 1.920 1.211 1.000 48 25 36.500 34807.537 32989.385 41303.420 29087.210 4632.101 0.327 12216.210 -1.670 1.420 642.958 380.600 41303.420 29087.210 35195.315 25.984 26.055 27.829 24.484 1.007 0.026 3.345 -0.908 1.137 0.541 0.424 27.829 24.484 26.156 9.200 9.200 9.200 9.200 0.000 0.000 0.000 0.000 1.000 0.000 0.000 9.200 9.200 9.200 1.807 1.672 2.263 1.464 0.299 0.476 0.798 -1.568 1.545 0.052 0.046 2.263 1.464 1.864 1.746 1.772 3.029 -0.314 0.935 -0.803 3.344 0.412 -9.634 0.656 0.492 3.029 0.241 1.635 7.050 7.050 10.000 4.400 2.343 0.036 5.600 -2.070 2.273 0.421 0.200 10.000 4.400 7.200 134633.839 124575.695 168596.837 109112.231 22269.436 0.476 59484.606 -1.568 1.545 3879.738 3457.021 168596.837 109112.231 138854.534 10192.463 12010.615 15912.790 3696.580 4632.101 -0.327 12216.210 -1.670 4.305 642.958 380.600 15912.790 3696.580 9804.685 C12116 25 113 No shred services No Yes Self Occupancy 45000.000 69.400 9.200 0.870 50 1238.006 0 492.726 1 No 64841.499 2 711 -0.953 20
1 29.500 29.500 34 25 3.028 0.000 9 -1.200 1.360 1.000 1.000 34 25 29.500 161086.428 160942.530 162452.330 160044.340 781.525 0.460 2407.990 -0.659 1.015 267.554 280.920 162452.330 160044.340 161248.335 62.596 59.531 73.431 57.099 6.122 0.869 16.333 -0.929 1.286 1.815 1.223 73.431 57.099 65.265 9.688 10.500 10.880 7.500 1.541 -0.898 3.380 -1.333 1.451 0.487 0.005 10.880 7.500 9.190 2.064 2.151 2.263 1.734 0.198 -0.707 0.529 -1.233 1.305 0.059 0.046 2.263 1.734 1.998 1.609 1.772 2.899 -0.314 0.927 -0.822 3.214 0.840 -9.220 0.636 0.581 2.899 0.314 1.607 4.810 4.700 5.800 4.400 0.396 1.952 1.400 4.594 1.318 0.211 0.200 5.800 4.400 5.100 259530.977 270380.129 284512.065 217951.153 24948.281 -0.707 66560.912 -1.233 1.305 7395.657 5833.824 284512.065 217951.153 251231.609 3413.572 3557.470 4455.660 2047.670 781.525 -0.460 2407.990 -0.659 2.176 267.554 280.920 4455.660 2047.670 3251.665 C12117 25 138 No shred services No Yes Self Occupancy 164500.000 70.000 7.500 1.869 38 1633.000 1 477.870 1 Yes 235000.000 1 767 -1.842 10
2 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 38846.072 39134.850 41740.320 34562.300 2256.890 -0.392 7178.020 -1.086 1.208 239.267 221.695 41740.320 34562.300 38151.310 20.430 20.484 24.220 16.898 2.658 0.039 7.322 -1.719 1.433 0.609 0.591 24.220 16.898 20.559 12.960 12.125 15.000 12.125 1.224 0.903 2.875 -1.129 1.237 0.133 0.000 15.000 12.125 13.562 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 193146.892 193921.317 243832.054 157802.838 26916.799 0.607 86029.215 -0.824 1.545 5163.118 4288.531 243832.054 157802.838 200817.446 5153.928 4865.150 9437.700 2259.680 2256.890 0.392 7178.020 -1.086 4.177 239.267 221.695 9437.700 2259.680 5848.690 C12118 25 105 No shred services No Yes Self Occupancy 44000.000 52.500 13.000 0.778 49 1489.000 0 351.594 1 Yes 83809.524 1 827 -0.064 31
3 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 30473.177 30594.560 32849.740 27355.730 1711.629 -0.249 5494.010 -1.162 1.201 183.134 161.585 32849.740 27355.730 30102.735 22.122 22.146 26.186 18.467 2.808 0.046 7.719 -1.717 1.418 0.646 0.621 26.186 18.467 22.326 12.350 12.350 12.350 12.350 0.000 0.000 0.000 0.000 1.000 0.000 0.000 12.350 12.350 12.350 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 139887.665 140448.546 176596.663 114289.546 19494.635 0.607 62307.117 -0.824 1.545 3739.416 3105.991 176596.663 114289.546 145443.104 4326.823 4205.440 7444.270 1950.260 1711.629 0.249 5494.010 -1.162 3.817 183.134 161.585 7444.270 1950.260 4697.265 C12119 25 106 Complex With shared services No No Self Occupancy 34800.000 56.900 12.350 0.784 43 1241.000 0 403.590 1 No 61159.930 1 795 -0.064 31
4 43.000 44.000 60 25 11.039 -0.136 35 -1.276 2.400 1.167 1.000 60 25 42.500 50557.092 50619.550 56330.070 43102.830 4026.425 -0.146 13227.240 -1.126 1.307 448.893 366.360 56330.070 43102.830 49716.450 23.319 23.981 27.352 18.516 2.946 -0.082 8.836 -1.604 1.477 0.688 0.558 27.352 18.516 22.934 8.363 9.250 11.375 4.000 2.819 -0.707 7.375 -1.144 2.844 0.321 0.000 11.375 4.000 7.688 1.793 1.800 2.263 1.464 0.250 0.607 0.798 -0.824 1.545 0.048 0.040 2.263 1.464 1.864 1.873 1.876 3.029 -0.314 0.862 -0.904 3.344 0.582 -9.634 0.616 0.478 3.029 0.241 1.635 7.029 7.200 10.000 4.400 1.945 0.062 5.600 -1.545 2.273 0.353 0.200 10.000 4.400 7.200 219821.312 220702.687 277506.313 179596.091 30634.125 0.607 97910.221 -0.824 1.545 5876.167 4880.795 277506.313 179596.091 228551.202 11042.908 10980.450 18497.170 5269.930 4026.425 0.146 13227.240 -1.126 3.510 448.893 366.360 18497.170 5269.930 11883.550 C12120 25 107 No shred services No Yes Self Occupancy 61600.000 64.300 9.250 0.781 44 1074.000 1 406.066 1 No 95800.933 1 827 -0.064 31


CreditRiskScore

In [97]:
sns.distplot(train_data_1['CreditRiskScore'] , fit=norm);

# Get the fitted parameters used by the function
(mu, sigma) = norm.fit(train_data_1['CreditRiskScore'])
print( '\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))

#Now plot the distribution
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)],
            loc='best')
plt.ylabel('Frequency')
plt.title('credit score distribution')

#Get also the QQ-plot
fig = plt.figure()
res = stats.probplot(train_data_1['CreditRiskScore'], plot=plt)
plt.show();
 mu = 627.46 and sigma = 148.09

In [98]:
sns.distplot(np.log1p(train_data_1['CreditRiskScore']) , fit=norm);

# Get the fitted parameters used by the function
(mu, sigma) = norm.fit(np.log1p(train_data_1['CreditRiskScore']))
print( '\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))

#Now plot the distribution
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)],
            loc='best')
plt.ylabel('Frequency')
plt.title('log(credit score+1) distribution')

#Get also the QQ-plot
fig = plt.figure()
res = stats.probplot(np.log1p(train_data_1['CreditRiskScore']), plot=plt)
plt.show();
 mu = 6.41 and sigma = 0.25


Correlation Plot

In [99]:
a=train_data_1[numerical_features]
corr_with_credit_score = a.corr()['CreditRiskScore'].sort_values(ascending=False)
plt.figure(figsize=(25,10))
corr_with_credit_score.drop('CreditRiskScore').plot.bar()
plt.show();
In [100]:
train_data_1['CreditRiskScore'] = np.log1p(train_data_1['CreditRiskScore'])

#log transform skewed numeric features:
numeric_feats = train_data_1.dtypes[train_data_1.dtypes != "object"].index

skewed_feats = train_data_1[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
skewed_feats = skewed_feats[skewed_feats > 0.75]
skewed_feats = skewed_feats.index

train_data_1[skewed_feats] = np.log1p(train_data_1[skewed_feats])
In [101]:
train_data_1[numeric_feats] = train_data_1[numeric_feats].apply(lambda x:pd.to_numeric(x)) #
In [102]:
train_data_1.drop(axis=1,columns='CustomerID',inplace=True)
In [103]:
df=train_data_1
In [104]:
df.head(5)
Out[104]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg Starting_Instalment Maturity_Period Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation age Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount Appraisal_value NoOfProperties CreditRiskScore payment_z_score payment_lenght
0 36.500 36.500 48 25 7.763 0.000 23 nan 1.072 0.793 0.693 48 25 36.500 10.458 10.404 10.629 10.278 8.441 0.327 9.411 nan 1.420 6.468 5.944 10.629 10.278 10.469 25.984 26.055 27.829 24.484 0.697 0.026 1.469 -2.390 1.137 0.432 0.424 27.829 24.484 26.156 9.200 9.200 2.322 9.200 0.000 0.000 0.000 0.000 0.693 0.000 0.000 2.322 9.200 2.322 1.807 1.672 2.263 0.902 0.299 0.476 0.798 nan 1.545 0.051 0.045 2.263 0.902 1.864 1.010 1.020 3.029 -0.378 0.935 -1.626 3.344 0.412 nan 0.656 0.492 3.029 0.216 0.969 7.050 7.050 10.000 1.686 2.343 0.035 5.600 nan 2.273 0.351 0.200 10.000 1.686 7.200 11.810 11.733 12.035 11.600 10.011 0.476 10.993 nan 1.545 8.264 8.148 12.035 11.600 11.841 9.230 9.394 9.675 8.215 8.441 -0.397 9.411 nan 4.305 6.468 5.944 9.675 8.215 9.191 25 113 No shred services No Yes Self Occupancy 10.714 69.400 9.200 0.870 3.932 7.122 0 492.726 1 No 11.080 1.099 6.568 -0.953 20
1 29.500 29.500 34 25 3.028 0.000 9 nan 0.859 0.693 0.693 34 25 29.500 11.990 11.989 11.998 11.983 6.663 0.460 7.787 -1.076 1.015 5.593 5.642 11.998 11.983 11.991 62.596 59.531 73.431 57.099 1.963 0.869 2.853 -2.642 1.286 1.035 1.223 73.431 57.099 65.265 9.688 10.500 2.475 7.500 0.932 -2.279 1.477 nan 0.896 0.397 0.005 2.475 7.500 2.321 2.064 2.151 2.263 1.006 0.198 -0.707 0.529 nan 1.305 0.057 0.045 2.263 1.006 1.998 0.959 1.020 2.899 -0.378 0.927 -1.725 3.214 0.840 nan 0.636 0.581 2.899 0.273 0.958 4.810 4.700 5.800 1.686 0.396 1.083 1.400 1.722 1.318 0.192 0.200 5.800 1.686 5.100 12.467 12.508 12.559 12.292 10.125 -0.707 11.106 nan 1.305 8.909 8.672 12.559 12.292 12.434 8.136 8.177 8.402 7.625 6.663 -0.616 7.787 -1.076 2.176 5.593 5.642 8.402 7.625 8.087 25 138 No shred services No Yes Self Occupancy 12.011 70.000 7.500 1.869 3.664 7.399 1 477.870 1 Yes 12.367 0.693 6.644 -1.842 10
2 43.000 44.000 60 25 11.039 -0.147 35 nan 1.224 0.773 0.693 60 25 42.500 10.567 10.575 10.639 10.451 7.722 -0.392 8.879 nan 1.208 5.482 5.406 10.639 10.451 10.549 20.430 20.484 24.220 16.898 1.297 0.039 2.119 nan 1.433 0.476 0.591 24.220 16.898 20.559 12.960 12.125 2.773 12.125 0.800 0.643 1.355 nan 0.805 0.125 0.000 2.773 12.125 2.678 1.793 1.800 2.263 0.902 0.250 0.607 0.798 -1.735 1.545 0.047 0.039 2.263 0.902 1.864 1.055 1.056 3.029 -0.378 0.862 -2.343 3.344 0.582 nan 0.616 0.478 3.029 0.216 0.969 7.029 7.200 10.000 1.686 1.945 0.060 5.600 nan 2.273 0.303 0.200 10.000 1.686 7.200 12.171 12.175 12.404 11.969 10.201 0.607 11.362 -1.735 1.545 8.549 8.364 12.404 11.969 12.210 8.548 8.490 9.153 7.723 7.722 0.331 8.879 nan 4.177 5.482 5.406 9.153 7.723 8.674 25 105 No shred services No Yes Self Occupancy 10.692 52.500 13.000 0.778 3.912 7.307 0 351.594 1 Yes 11.336 0.693 6.719 -0.064 31
3 43.000 44.000 60 25 11.039 -0.147 35 nan 1.224 0.773 0.693 60 25 42.500 10.325 10.329 10.400 10.217 7.446 -0.249 8.612 nan 1.201 5.216 5.091 10.400 10.217 10.312 22.122 22.146 26.186 18.467 1.337 0.046 2.166 nan 1.418 0.498 0.621 26.186 18.467 22.326 12.350 12.350 2.592 12.350 0.000 0.000 0.000 0.000 0.693 0.000 0.000 2.592 12.350 2.592 1.793 1.800 2.263 0.902 0.250 0.607 0.798 -1.735 1.545 0.047 0.039 2.263 0.902 1.864 1.055 1.056 3.029 -0.378 0.862 -2.343 3.344 0.582 nan 0.616 0.478 3.029 0.216 0.969 7.029 7.200 10.000 1.686 1.945 0.060 5.600 nan 2.273 0.303 0.200 10.000 1.686 7.200 11.849 11.853 12.082 11.646 9.878 0.607 11.040 -1.735 1.545 8.227 8.041 12.082 11.646 11.888 8.373 8.344 8.915 7.576 7.446 0.222 8.612 nan 3.817 5.216 5.091 8.915 7.576 8.455 25 106 Complex With shared services No No Self Occupancy 10.457 56.900 12.350 0.784 3.784 7.124 0 403.590 1 No 11.021 0.693 6.680 -0.064 31
4 43.000 44.000 60 25 11.039 -0.147 35 nan 1.224 0.773 0.693 60 25 42.500 10.831 10.832 10.939 10.671 8.301 -0.146 9.490 nan 1.307 6.109 5.906 10.939 10.671 10.814 23.319 23.981 27.352 18.516 1.373 -0.082 2.286 nan 1.477 0.524 0.558 27.352 18.516 22.934 8.363 9.250 2.516 4.000 1.340 -1.229 2.125 nan 1.346 0.278 0.000 2.516 4.000 2.162 1.793 1.800 2.263 0.902 0.250 0.607 0.798 -1.735 1.545 0.047 0.039 2.263 0.902 1.864 1.055 1.056 3.029 -0.378 0.862 -2.343 3.344 0.582 nan 0.616 0.478 3.029 0.216 0.969 7.029 7.200 10.000 1.686 1.945 0.060 5.600 nan 2.273 0.303 0.200 10.000 1.686 7.200 12.301 12.305 12.534 12.098 10.330 0.607 11.492 -1.735 1.545 8.679 8.493 12.534 12.098 12.340 9.310 9.304 9.825 8.570 8.301 0.136 9.490 nan 3.510 6.109 5.906 9.825 8.570 9.383 25 107 No shred services No Yes Self Occupancy 11.028 64.300 9.250 0.781 3.807 6.980 1 406.066 1 No 11.470 0.693 6.719 -0.064 31
In [105]:
df = pd.get_dummies(df,drop_first=True)
df = df.fillna(df.mean())
In [106]:
df.isna().sum()
Out[106]:
Current_Instalment_Sequence_mean                  0
Current_Instalment_Sequence_median                0
Current_Instalment_Sequence_max                   0
Current_Instalment_Sequence_min                   0
Current_Instalment_Sequence_std                   0
Current_Instalment_Sequence_skew                  0
Current_Instalment_Sequence_range                 0
Current_Instalment_Sequence_kurtosis              0
Current_Instalment_Sequence_maxtoMin              0
Current_Instalment_Sequence_meanAD                0
Current_Instalment_Sequence_mad                   0
Current_Instalment_Sequence_abs_max               0
Current_Instalment_Sequence_abs_min               0
Current_Instalment_Sequence_abs_avg               0
Current_Outstanding_mean                          0
Current_Outstanding_median                        0
Current_Outstanding_max                           0
Current_Outstanding_min                           0
Current_Outstanding_std                           0
Current_Outstanding_skew                          0
Current_Outstanding_range                         0
Current_Outstanding_kurtosis                      0
Current_Outstanding_maxtoMin                      0
Current_Outstanding_meanAD                        0
Current_Outstanding_mad                           0
Current_Outstanding_abs_max                       0
Current_Outstanding_abs_min                       0
Current_Outstanding_abs_avg                       0
Current_Loan_to_Appraisedvalu_Percent_mean        0
Current_Loan_to_Appraisedvalu_Percent_median      0
Current_Loan_to_Appraisedvalu_Percent_max         0
Current_Loan_to_Appraisedvalu_Percent_min         0
Current_Loan_to_Appraisedvalu_Percent_std         0
Current_Loan_to_Appraisedvalu_Percent_skew        0
Current_Loan_to_Appraisedvalu_Percent_range       0
Current_Loan_to_Appraisedvalu_Percent_kurtosis    0
Current_Loan_to_Appraisedvalu_Percent_maxtoMin    0
Current_Loan_to_Appraisedvalu_Percent_meanAD      0
Current_Loan_to_Appraisedvalu_Percent_mad         0
Current_Loan_to_Appraisedvalu_Percent_abs_max     0
Current_Loan_to_Appraisedvalu_Percent_abs_min     0
Current_Loan_to_Appraisedvalu_Percent_abs_avg     0
CurrentInterestrate_mean                          0
CurrentInterestrate_median                        0
CurrentInterestrate_max                           0
CurrentInterestrate_min                           0
CurrentInterestrate_std                           0
CurrentInterestrate_skew                          0
CurrentInterestrate_range                         0
CurrentInterestrate_kurtosis                      0
CurrentInterestrate_maxtoMin                      0
CurrentInterestrate_meanAD                        0
CurrentInterestrate_mad                           0
CurrentInterestrate_abs_max                       0
CurrentInterestrate_abs_min                       0
CurrentInterestrate_abs_avg                       0
RealEstate_Current_Inflation_mean                 0
RealEstate_Current_Inflation_median               0
RealEstate_Current_Inflation_max                  0
RealEstate_Current_Inflation_min                  0
RealEstate_Current_Inflation_std                  0
RealEstate_Current_Inflation_skew                 0
RealEstate_Current_Inflation_range                0
RealEstate_Current_Inflation_kurtosis             0
RealEstate_Current_Inflation_maxtoMin             0
RealEstate_Current_Inflation_meanAD               0
RealEstate_Current_Inflation_mad                  0
RealEstate_Current_Inflation_abs_max              0
RealEstate_Current_Inflation_abs_min              0
RealEstate_Current_Inflation_abs_avg              0
GDP_mean                                          0
GDP_median                                        0
GDP_max                                           0
GDP_min                                           0
GDP_std                                           0
GDP_skew                                          0
GDP_range                                         0
GDP_kurtosis                                      0
GDP_maxtoMin                                      0
GDP_meanAD                                        0
GDP_mad                                           0
GDP_abs_max                                       0
GDP_abs_min                                       0
GDP_abs_avg                                       0
UnemploymentRate_mean                             0
UnemploymentRate_median                           0
UnemploymentRate_max                              0
UnemploymentRate_min                              0
UnemploymentRate_std                              0
UnemploymentRate_skew                             0
UnemploymentRate_range                            0
UnemploymentRate_kurtosis                         0
UnemploymentRate_maxtoMin                         0
UnemploymentRate_meanAD                           0
UnemploymentRate_mad                              0
UnemploymentRate_abs_max                          0
UnemploymentRate_abs_min                          0
UnemploymentRate_abs_avg                          0
current_Appraisal_value_mean                      0
current_Appraisal_value_median                    0
current_Appraisal_value_max                       0
current_Appraisal_value_min                       0
current_Appraisal_value_std                       0
current_Appraisal_value_skew                      0
current_Appraisal_value_range                     0
current_Appraisal_value_kurtosis                  0
current_Appraisal_value_maxtoMin                  0
current_Appraisal_value_meanAD                    0
current_Appraisal_value_mad                       0
current_Appraisal_value_abs_max                   0
current_Appraisal_value_abs_min                   0
current_Appraisal_value_abs_avg                   0
remaining_outstanding_mean                        0
remaining_outstanding_median                      0
remaining_outstanding_max                         0
remaining_outstanding_min                         0
remaining_outstanding_std                         0
remaining_outstanding_skew                        0
remaining_outstanding_range                       0
remaining_outstanding_kurtosis                    0
remaining_outstanding_maxtoMin                    0
remaining_outstanding_meanAD                      0
remaining_outstanding_mad                         0
remaining_outstanding_abs_max                     0
remaining_outstanding_abs_min                     0
remaining_outstanding_abs_avg                     0
Starting_Instalment                               0
Maturity_Period                                   0
Starting_outstanding                              0
Starting_Loan_to_Appraisedvalu_Percent            0
StartingInterestrate                              0
RealEstate_Starting_Inflation                     0
age                                               0
Salary                                            0
UtilitySpending                                   0
Appraisal_value                                   0
NoOfProperties                                    0
CreditRiskScore                                   0
payment_z_score                                   0
payment_lenght                                    0
Asset_type_No shred services                      0
Urban_Development_Yes                             0
Villa_House_Yes                                   0
Investment_SelfOccupied_Self Occupancy            0
ProfessionalLicensure_1                           0
eCommerceAccount_1                                0
SocialMediaAccount_Yes                            0
dtype: int64
Remove na From Dataset
In [107]:
import pandas as pd

def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)
In [108]:
df1=clean_dataset(df)
In [109]:
df1.shape
Out[109]:
(9894, 147)


Split Data

In [110]:
#X, y = df.drop(['CreditRiskScore'], axis = 1), df['CreditRiskScore']

#X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size = 0.2, random_state = 0)
y= df1['CreditRiskScore']
X=df1.drop(['CreditRiskScore'], axis = 1)
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)  


BaseLine Model

1 MLR

In [112]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
y=y_train
X=X_train
model = smf.OLS(y,X).fit()

predictions = model.predict(X)

print(model.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:        CreditRiskScore   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 1.090e+05
Date:                Fri, 17 May 2019   Prob (F-statistic):               0.00
Time:                        12:54:33   Log-Likelihood:                 3409.8
No. Observations:                7915   AIC:                            -6582.
Df Residuals:                    7796   BIC:                            -5751.
Df Model:                         119                                         
Covariance Type:            nonrobust                                         
==================================================================================================================
                                                     coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------------------------
Current_Instalment_Sequence_mean                   0.0963      0.033      2.928      0.003       0.032       0.161
Current_Instalment_Sequence_median                -0.0133      0.006     -2.165      0.030      -0.025      -0.001
Current_Instalment_Sequence_max                   -0.0219      0.010     -2.097      0.036      -0.042      -0.001
Current_Instalment_Sequence_min                   -0.0213      0.005     -3.942      0.000      -0.032      -0.011
Current_Instalment_Sequence_std                   -0.0343      0.045     -0.769      0.442      -0.122       0.053
Current_Instalment_Sequence_skew                   0.0366      0.037      0.989      0.323      -0.036       0.109
Current_Instalment_Sequence_range                 -0.0006      0.009     -0.061      0.951      -0.019       0.018
Current_Instalment_Sequence_kurtosis              -0.0022      0.006     -0.340      0.734      -0.015       0.010
Current_Instalment_Sequence_maxtoMin               0.0110      0.126      0.088      0.930      -0.236       0.258
Current_Instalment_Sequence_meanAD                 0.2997      0.378      0.793      0.428      -0.441       1.040
Current_Instalment_Sequence_mad                   -0.6746      0.423     -1.596      0.111      -1.503       0.154
Current_Instalment_Sequence_abs_max               -0.0219      0.010     -2.097      0.036      -0.042      -0.001
Current_Instalment_Sequence_abs_min               -0.0213      0.005     -3.941      0.000      -0.032      -0.011
Current_Instalment_Sequence_abs_avg               -0.0216      0.007     -3.145      0.002      -0.035      -0.008
Current_Outstanding_mean                           0.0056      0.181      0.031      0.975      -0.349       0.360
Current_Outstanding_median                         0.1313      0.093      1.417      0.157      -0.050       0.313
Current_Outstanding_max                            0.1687      0.056      3.016      0.003       0.059       0.278
Current_Outstanding_min                            0.0291      0.011      2.753      0.006       0.008       0.050
Current_Outstanding_std                            0.0157      0.011      1.380      0.167      -0.007       0.038
Current_Outstanding_skew                          -0.0024      0.005     -0.507      0.612      -0.012       0.007
Current_Outstanding_range                         -0.0060      0.014     -0.438      0.661      -0.033       0.021
Current_Outstanding_kurtosis                       0.0007      0.001      0.532      0.595      -0.002       0.003
Current_Outstanding_maxtoMin                       0.0019      0.001      1.866      0.062   -9.64e-05       0.004
Current_Outstanding_meanAD                        -0.0164      0.007     -2.350      0.019      -0.030      -0.003
Current_Outstanding_mad                           -0.0008      0.001     -0.830      0.407      -0.003       0.001
Current_Outstanding_abs_max                        0.1687      0.056      3.016      0.003       0.059       0.278
Current_Outstanding_abs_min                        0.0291      0.011      2.753      0.006       0.008       0.050
Current_Outstanding_abs_avg                       -0.2513      0.151     -1.669      0.095      -0.546       0.044
Current_Loan_to_Appraisedvalu_Percent_mean         0.0034      0.003      1.010      0.312      -0.003       0.010
Current_Loan_to_Appraisedvalu_Percent_median      -0.0027      0.002     -1.623      0.105      -0.006       0.001
Current_Loan_to_Appraisedvalu_Percent_max          0.0011      0.001      1.617      0.106      -0.000       0.002
Current_Loan_to_Appraisedvalu_Percent_min         -0.0041      0.001     -7.993      0.000      -0.005      -0.003
Current_Loan_to_Appraisedvalu_Percent_std          0.1262      0.038      3.337      0.001       0.052       0.200
Current_Loan_to_Appraisedvalu_Percent_skew         0.0277      0.011      2.532      0.011       0.006       0.049
Current_Loan_to_Appraisedvalu_Percent_range       -0.3944      0.037    -10.753      0.000      -0.466      -0.322
Current_Loan_to_Appraisedvalu_Percent_kurtosis     0.0020      0.003      0.721      0.471      -0.003       0.007
Current_Loan_to_Appraisedvalu_Percent_maxtoMin    -0.0019      0.001     -1.802      0.072      -0.004       0.000
Current_Loan_to_Appraisedvalu_Percent_meanAD       0.2825      0.042      6.691      0.000       0.200       0.365
Current_Loan_to_Appraisedvalu_Percent_mad         -0.0207      0.008     -2.641      0.008      -0.036      -0.005
Current_Loan_to_Appraisedvalu_Percent_abs_max      0.0011      0.001      1.616      0.106      -0.000       0.002
Current_Loan_to_Appraisedvalu_Percent_abs_min     -0.0041      0.001     -7.993      0.000      -0.005      -0.003
Current_Loan_to_Appraisedvalu_Percent_abs_avg     -0.0015      0.000     -3.290      0.001      -0.002      -0.001
CurrentInterestrate_mean                           0.0085      0.010      0.818      0.413      -0.012       0.029
CurrentInterestrate_median                        -0.0059      0.005     -1.224      0.221      -0.015       0.004
CurrentInterestrate_max                           -0.0703      0.105     -0.672      0.502      -0.275       0.135
CurrentInterestrate_min                           -0.0024      0.005     -0.512      0.608      -0.012       0.007
CurrentInterestrate_std                           -0.0233      0.040     -0.590      0.555      -0.101       0.054
CurrentInterestrate_skew                           0.0091      0.006      1.568      0.117      -0.002       0.020
CurrentInterestrate_range                          0.0017      0.027      0.062      0.951      -0.051       0.055
CurrentInterestrate_kurtosis                      -0.0016      0.004     -0.445      0.657      -0.008       0.005
CurrentInterestrate_maxtoMin                       0.0461      0.057      0.815      0.415      -0.065       0.157
CurrentInterestrate_meanAD                         0.0188      0.044      0.428      0.669      -0.067       0.105
CurrentInterestrate_mad                            0.0701      0.055      1.263      0.207      -0.039       0.179
CurrentInterestrate_abs_max                       -0.0703      0.105     -0.672      0.502      -0.275       0.135
CurrentInterestrate_abs_min                       -0.0024      0.005     -0.512      0.608      -0.012       0.007
CurrentInterestrate_abs_avg                        0.1293      0.210      0.615      0.538      -0.283       0.541
RealEstate_Current_Inflation_mean                  2.6807      3.486      0.769      0.442      -4.152       9.514
RealEstate_Current_Inflation_median               -3.7052      0.927     -3.999      0.000      -5.521      -1.889
RealEstate_Current_Inflation_max                   0.2773      0.973      0.285      0.776      -1.630       2.185
RealEstate_Current_Inflation_min                   0.1845      5.035      0.037      0.971      -9.686      10.055
RealEstate_Current_Inflation_std                  -1.0911      0.919     -1.187      0.235      -2.892       0.710
RealEstate_Current_Inflation_skew                  0.0276      0.013      2.095      0.036       0.002       0.053
RealEstate_Current_Inflation_range                 0.0937      1.922      0.049      0.961      -3.673       3.861
RealEstate_Current_Inflation_kurtosis            -12.6563      6.331     -1.999      0.046     -25.067      -0.246
RealEstate_Current_Inflation_maxtoMin              3.3051      1.847      1.790      0.074      -0.315       6.925
RealEstate_Current_Inflation_meanAD                8.9826      4.947      1.816      0.069      -0.716      18.681
RealEstate_Current_Inflation_mad                  -1.5439      2.048     -0.754      0.451      -5.558       2.470
RealEstate_Current_Inflation_abs_max               0.2773      0.973      0.285      0.776      -1.630       2.185
RealEstate_Current_Inflation_abs_min               0.1845      5.035      0.037      0.971      -9.686      10.055
RealEstate_Current_Inflation_abs_avg               0.2304      1.867      0.123      0.902      -3.430       3.891
GDP_mean                                          -0.1793      0.169     -1.064      0.287      -0.510       0.151
GDP_median                                         0.3263      0.091      3.598      0.000       0.149       0.504
GDP_max                                           -0.1882      0.123     -1.535      0.125      -0.428       0.052
GDP_min                                            0.1524      0.080      1.907      0.057      -0.004       0.309
GDP_std                                           -0.0544      0.112     -0.485      0.628      -0.274       0.165
GDP_skew                                           0.0110      0.006      1.850      0.064      -0.001       0.023
GDP_range                                          0.4141      0.152      2.728      0.006       0.116       0.712
GDP_kurtosis                                       0.0143      0.009      1.512      0.131      -0.004       0.033
GDP_maxtoMin                                       0.2123      0.095      2.230      0.026       0.026       0.399
GDP_meanAD                                         0.2443      0.119      2.045      0.041       0.010       0.478
GDP_mad                                           -0.1608      0.073     -2.199      0.028      -0.304      -0.017
GDP_abs_max                                       -0.1882      0.123     -1.535      0.125      -0.428       0.052
GDP_abs_min                                        1.3098      0.384      3.408      0.001       0.556       2.063
GDP_abs_avg                                       -0.1667      0.708     -0.235      0.814      -1.555       1.222
UnemploymentRate_mean                             -0.1353      0.054     -2.493      0.013      -0.242      -0.029
UnemploymentRate_median                            0.0388      0.010      3.996      0.000       0.020       0.058
UnemploymentRate_max                               0.0850      0.065      1.309      0.191      -0.042       0.212
UnemploymentRate_min                              -0.7456      0.892     -0.836      0.403      -2.494       1.003
UnemploymentRate_std                              -0.0135      0.061     -0.222      0.825      -0.133       0.106
UnemploymentRate_skew                              0.0072      0.005      1.434      0.152      -0.003       0.017
UnemploymentRate_range                            -0.1417      0.131     -1.082      0.279      -0.398       0.115
UnemploymentRate_kurtosis                         -0.0007      0.003     -0.222      0.825      -0.007       0.005
UnemploymentRate_maxtoMin                         -0.0573      0.420     -0.136      0.892      -0.880       0.766
UnemploymentRate_meanAD                           -0.0474      0.286     -0.166      0.868      -0.608       0.513
UnemploymentRate_mad                              -0.0486      0.108     -0.452      0.652      -0.259       0.162
UnemploymentRate_abs_max                           0.0850      0.065      1.309      0.191      -0.042       0.212
UnemploymentRate_abs_min                          -0.7456      0.892     -0.836      0.403      -2.494       1.003
UnemploymentRate_abs_avg                           0.1558      0.123      1.268      0.205      -0.085       0.397
current_Appraisal_value_mean                      -5.5131      6.337     -0.870      0.384     -17.935       6.909
current_Appraisal_value_median                     6.8086      1.659      4.104      0.000       3.556      10.061
current_Appraisal_value_max                        4.3694      4.791      0.912      0.362      -5.022      13.761
current_Appraisal_value_min                       11.7121      8.432      1.389      0.165      -4.817      28.241
current_Appraisal_value_std                        0.1497      0.174      0.860      0.390      -0.192       0.491
current_Appraisal_value_skew                       0.0276      0.013      2.095      0.036       0.002       0.053
current_Appraisal_value_range                      0.1288      0.230      0.560      0.575      -0.322       0.580
current_Appraisal_value_kurtosis                  12.6430      6.329      1.998      0.046       0.236      25.050
current_Appraisal_value_maxtoMin                   3.3051      1.847      1.790      0.074      -0.315       6.925
current_Appraisal_value_meanAD                    -0.5862      0.270     -2.169      0.030      -1.116      -0.056
current_Appraisal_value_mad                        0.0239      0.084      0.286      0.775      -0.140       0.188
current_Appraisal_value_abs_max                    4.3694      4.791      0.912      0.362      -5.022      13.761
current_Appraisal_value_abs_min                   11.7121      8.432      1.389      0.165      -4.817      28.241
current_Appraisal_value_abs_avg                  -34.0039     25.844     -1.316      0.188     -84.665      16.657
remaining_outstanding_mean                         0.0233      0.006      4.052      0.000       0.012       0.035
remaining_outstanding_median                       0.0029      0.003      1.145      0.252      -0.002       0.008
remaining_outstanding_max                          0.0051      0.002      2.108      0.035       0.000       0.010
remaining_outstanding_min                          0.0065      0.003      2.072      0.038       0.000       0.013
remaining_outstanding_std                          0.0157      0.011      1.380      0.167      -0.007       0.038
remaining_outstanding_skew                         0.0026      0.006      0.433      0.665      -0.009       0.014
remaining_outstanding_range                       -0.0060      0.014     -0.438      0.661      -0.033       0.021
remaining_outstanding_kurtosis                     0.0007      0.001      0.532      0.595      -0.002       0.003
remaining_outstanding_maxtoMin                 -3.493e-09   3.21e-09     -1.088      0.277   -9.79e-09     2.8e-09
remaining_outstanding_meanAD                      -0.0164      0.007     -2.350      0.019      -0.030      -0.003
remaining_outstanding_mad                         -0.0008      0.001     -0.830      0.407      -0.003       0.001
remaining_outstanding_abs_max                     -0.1429      0.049     -2.933      0.003      -0.238      -0.047
remaining_outstanding_abs_min                     -0.0084      0.003     -3.281      0.001      -0.013      -0.003
remaining_outstanding_abs_avg                      0.1332      0.043      3.106      0.002       0.049       0.217
Starting_Instalment                                0.0223      0.013      1.742      0.082      -0.003       0.047
Maturity_Period                                   -0.0013      0.000     -6.658      0.000      -0.002      -0.001
Starting_outstanding                              -1.4179      0.135    -10.534      0.000      -1.682      -1.154
Starting_Loan_to_Appraisedvalu_Percent             0.0161      0.002      9.081      0.000       0.013       0.020
StartingInterestrate                               0.0003      0.001      0.386      0.699      -0.001       0.002
RealEstate_Starting_Inflation                     -0.4490      0.061     -7.368      0.000      -0.568      -0.330
age                                               -0.0372      0.032     -1.159      0.246      -0.100       0.026
Salary                                             0.0422      0.005      8.143      0.000       0.032       0.052
UtilitySpending                                 2.739e-05   1.99e-05      1.374      0.169   -1.17e-05    6.65e-05
Appraisal_value                                    1.9228      0.154     12.471      0.000       1.621       2.225
NoOfProperties                                    -0.0068      0.011     -0.590      0.555      -0.029       0.016
payment_z_score                                    0.0054      0.002      2.569      0.010       0.001       0.010
payment_lenght                                     0.0267      0.014      1.951      0.051      -0.000       0.053
Asset_type_No shred services                       0.0018      0.008      0.212      0.832      -0.015       0.018
Urban_Development_Yes                             -0.0044      0.007     -0.627      0.531      -0.018       0.009
Villa_House_Yes                                   -0.0026      0.005     -0.473      0.636      -0.013       0.008
Investment_SelfOccupied_Self Occupancy            -0.0183      0.009     -2.041      0.041      -0.036      -0.001
ProfessionalLicensure_1                            0.0006      0.005      0.119      0.905      -0.009       0.011
eCommerceAccount_1                                 0.0011      0.004      0.300      0.764      -0.006       0.008
SocialMediaAccount_Yes                            -0.0032      0.005     -0.702      0.483      -0.012       0.006
==============================================================================
Omnibus:                      436.963   Durbin-Watson:                   2.034
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              513.896
Skew:                          -0.595   Prob(JB):                    2.56e-112
Kurtosis:                       3.376   Cond. No.                     1.00e+16
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 2.79e-17. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
Observation-R-squared and Adj. R-squared shows very high
In [113]:
test_prediction = model.predict(X_test)
In [115]:
print('Train r2 score: ', r2_score(predictions, y_train))
print('Test r2 score: ', r2_score(y_test, test_prediction))
train_mse1 = mean_squared_error(predictions, y_train)
test_mse1 = mean_squared_error(y_test, test_prediction)
train_rmse1 = np.sqrt(train_mse1)
test_rmse1 = np.sqrt(test_mse1)
print('Train RMSE: %.4f' % train_rmse1)
print('Test RMSE: %.4f' % test_rmse1)
Train r2 score:  0.3780969375072847
Test r2 score:  0.6169382708225157
Train RMSE: 0.1573
Test RMSE: 0.1562
observation- model build is so simple as RMSE are same

2 Elastic Net

In [116]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import cross_val_score
from itertools import product

def rmse_cv(model):
    rmse= np.sqrt(-cross_val_score(model, X_train, y_train, scoring="mean_squared_error", cv = 5))
    return(rmse)
In [117]:
alphas = [0.0005, 0.001, 0.01, 0.03, 0.05, 0.1]
l1_ratios = [1.5, 1.1, 1, 0.9, 0.8, 0.7, 0.5]
In [118]:
cv_elastic = [rmse_cv(ElasticNet(alpha = alpha, l1_ratio=l1_ratio)).mean() 
            for (alpha, l1_ratio) in product(alphas, l1_ratios)]
In [121]:
elastic = ElasticNet(alpha=0.0005, l1_ratio=0.9)
elastic.fit(X_train, y_train)
Out[121]:
ElasticNet(alpha=0.0005, copy_X=True, fit_intercept=True, l1_ratio=0.9,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)
In [122]:
#let's look at the residuals as well:
matplotlib.rcParams['figure.figsize'] = (6.0, 6.0)

preds = pd.DataFrame({"preds":elastic.predict(X_train), "true":y_train})
preds["residuals"] = preds["true"] - preds["preds"]
preds.plot(x = "preds", y = "residuals",kind = "scatter")
Out[122]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e18b68668>
In [123]:
rmse = np.sqrt(np.mean((preds['true']-preds['preds'])**2))
print ('RMSE: {0:.4f}'.format(rmse))
RMSE: 0.1626
In [124]:
from sklearn.metrics import r2_score
print('R^2 train: %.3f' %  r2_score(preds['true'], preds['preds']))
R^2 train: 0.590
In [125]:
coef = pd.Series(elastic.coef_, index = X_train.columns)
In [126]:
imp_coef = pd.concat([coef.sort_values().head(50),
                     coef.sort_values().tail(10)])
In [127]:
feature_importance = pd.Series(index = X_train.columns, data = np.abs(elastic.coef_))

n_selected_features = (feature_importance>0).sum()
print('{0:d} features, reduction of {1:2.2f}%'.format(
    n_selected_features,(1-n_selected_features/len(feature_importance))*100))

feature_importance.sort_values().tail(30).plot(kind = 'bar', figsize = (18,6))
71 features, reduction of 51.37%
Out[127]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e200c6080>

As we want least RMSE lets move on Boosting models

3 XG boost

In [130]:
xgb_model1 = XGBRegressor()
xgb_model1.fit(X_train, y_train, verbose=False)
Out[130]:
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)
In [131]:
y_train_pred1 = xgb_model1.predict(X_train)
y_pred1 = xgb_model1.predict(X_test)

print('Train r2 score: ', r2_score(y_train_pred1, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred1))
train_mse1 = mean_squared_error(y_train_pred1, y_train)
test_mse1 = mean_squared_error(y_pred1, y_test)
train_rmse1 = np.sqrt(train_mse1)
test_rmse1 = np.sqrt(test_mse1)
print('Train RMSE: %.4f' % train_rmse1)
print('Test RMSE: %.4f' % test_rmse1)
Train r2 score:  0.6057759351120758
Test r2 score:  0.714161701846405
Train RMSE: 0.1312
Test RMSE: 0.1350
In [132]:
xgb_model2 = XGBRegressor(n_estimators=1000)
xgb_model2.fit(X_train, y_train, early_stopping_rounds=5, 
             eval_set=[(X_test, y_test)], verbose=False)
y_train_pred2 = xgb_model2.predict(X_train)
y_pred2 = xgb_model2.predict(X_test)

print('Train r2 score: ', r2_score(y_train_pred2, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred2))
train_mse2 = mean_squared_error(y_train_pred2, y_train)
test_mse2 = mean_squared_error(y_pred2, y_test)
train_rmse2 = np.sqrt(train_mse2)
test_rmse2 = np.sqrt(test_mse2)
print('Train RMSE: %.4f' % train_rmse2)
print('Test RMSE: %.4f' % test_rmse2)
Train r2 score:  0.6042846273061067
Test r2 score:  0.7143944385533139
Train RMSE: 0.1315
Test RMSE: 0.1349
In [133]:
xgb_model3 = XGBRegressor(n_estimators=1000, learning_rate=0.05)
xgb_model3.fit(X_train, y_train, early_stopping_rounds=5, 
             eval_set=[(X_test, y_test)], verbose=False)
y_train_pred3 = xgb_model3.predict(X_train)
y_pred3 = xgb_model3.predict(X_test)

print('Train r2 score: ', r2_score(y_train_pred3, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred3))
train_mse3 = mean_squared_error(y_train_pred3, y_train)
test_mse3 = mean_squared_error(y_pred3, y_test)
train_rmse3 = np.sqrt(train_mse3)
test_rmse3 = np.sqrt(test_mse3)
print('Train RMSE: %.4f' % train_rmse3)
print('Test RMSE: %.4f' % test_rmse3)
Train r2 score:  0.5995142020220185
Test r2 score:  0.7166429035796307
Train RMSE: 0.1319
Test RMSE: 0.1344
In [134]:
xgb_model4 = XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,
                         colsample_bytree=1, max_depth=7, n_jobs=-1)
xgb_model4.fit(X_train,y_train)
y_train_pred4 = xgb_model4.predict(X_train)
y_pred4 = xgb_model4.predict(X_test)

print('Train r2 score: ', r2_score(y_train_pred4, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred4))
train_mse4 = mean_squared_error(y_train_pred4, y_train)
test_mse4 = mean_squared_error(y_pred4, y_test)
train_rmse4 = np.sqrt(train_mse4)
test_rmse4 = np.sqrt(test_mse4)
print('Train RMSE: %.4f' % train_rmse4)
print('Test RMSE: %.4f' % test_rmse4)
Train r2 score:  0.7963757378620433
Test r2 score:  0.733378321477149
Train RMSE: 0.0987
Test RMSE: 0.1303


ON LEADERBOARD SCORE- 117.35

In [148]:
feature_important = xgb_model4.feature_importances_
keys = list( X_train.columns)
values = list(feature_important)

data = pd.DataFrame(data=values, index=keys, columns=["score"]).sort_values(by = "score", ascending=True)
data.plot(kind='barh',figsize=(30,55),fontsize=25)
Out[148]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e1adb6e48>

I concluded that xgb_model4 is the best model.

In [158]:
from collections import OrderedDict
OrderedDict(sorted(xgb_model4.get_booster().get_fscore().items(), key=lambda t: t[1], reverse=True))
Out[158]:
OrderedDict([('Current_Instalment_Sequence_mean', 380),
             ('Current_Loan_to_Appraisedvalu_Percent_min', 193),
             ('Current_Outstanding_mean', 170),
             ('Current_Outstanding_skew', 154),
             ('Current_Outstanding_maxtoMin', 152),
             ('Current_Loan_to_Appraisedvalu_Percent_mean', 136),
             ('Current_Outstanding_std', 113),
             ('Maturity_Period', 106),
             ('Current_Loan_to_Appraisedvalu_Percent_max', 105),
             ('remaining_outstanding_maxtoMin', 100),
             ('Salary', 100),
             ('Current_Loan_to_Appraisedvalu_Percent_meanAD', 96),
             ('UtilitySpending', 94),
             ('CurrentInterestrate_skew', 89),
             ('Current_Loan_to_Appraisedvalu_Percent_mad', 85),
             ('Current_Instalment_Sequence_std', 84),
             ('Current_Loan_to_Appraisedvalu_Percent_median', 83),
             ('payment_z_score', 79),
             ('Current_Loan_to_Appraisedvalu_Percent_skew', 77),
             ('Current_Outstanding_kurtosis', 74),
             ('Current_Loan_to_Appraisedvalu_Percent_std', 74),
             ('Current_Loan_to_Appraisedvalu_Percent_abs_avg', 70),
             ('Current_Instalment_Sequence_skew', 69),
             ('CurrentInterestrate_mean', 68),
             ('Current_Outstanding_meanAD', 67),
             ('Current_Outstanding_mad', 66),
             ('Current_Instalment_Sequence_max', 65),
             ('Current_Instalment_Sequence_min', 65),
             ('Current_Outstanding_range', 60),
             ('CurrentInterestrate_max', 58),
             ('Current_Loan_to_Appraisedvalu_Percent_maxtoMin', 55),
             ('Current_Outstanding_min', 53),
             ('Current_Loan_to_Appraisedvalu_Percent_kurtosis', 52),
             ('GDP_skew', 52),
             ('remaining_outstanding_min', 50),
             ('RealEstate_Current_Inflation_std', 50),
             ('RealEstate_Current_Inflation_meanAD', 46),
             ('Current_Loan_to_Appraisedvalu_Percent_range', 46),
             ('GDP_std', 46),
             ('remaining_outstanding_max', 46),
             ('Starting_Loan_to_Appraisedvalu_Percent', 45),
             ('remaining_outstanding_abs_min', 45),
             ('StartingInterestrate', 45),
             ('CurrentInterestrate_meanAD', 44),
             ('age', 44),
             ('CurrentInterestrate_kurtosis', 43),
             ('UnemploymentRate_skew', 42),
             ('remaining_outstanding_skew', 41),
             ('UnemploymentRate_std', 39),
             ('GDP_meanAD', 37),
             ('CurrentInterestrate_median', 37),
             ('Current_Instalment_Sequence_maxtoMin', 36),
             ('remaining_outstanding_median', 36),
             ('current_Appraisal_value_std', 36),
             ('GDP_kurtosis', 35),
             ('remaining_outstanding_mean', 35),
             ('current_Appraisal_value_mad', 33),
             ('remaining_outstanding_abs_max', 32),
             ('CurrentInterestrate_min', 30),
             ('CurrentInterestrate_std', 29),
             ('CurrentInterestrate_abs_avg', 29),
             ('remaining_outstanding_abs_avg', 28),
             ('current_Appraisal_value_mean', 28),
             ('current_Appraisal_value_range', 28),
             ('RealEstate_Starting_Inflation', 27),
             ('current_Appraisal_value_meanAD', 26),
             ('CurrentInterestrate_mad', 23),
             ('GDP_mad', 23),
             ('Appraisal_value', 23),
             ('current_Appraisal_value_median', 22),
             ('Current_Instalment_Sequence_meanAD', 21),
             ('CurrentInterestrate_maxtoMin', 20),
             ('GDP_mean', 20),
             ('RealEstate_Current_Inflation_kurtosis', 19),
             ('RealEstate_Current_Inflation_skew', 18),
             ('NoOfProperties', 17),
             ('CurrentInterestrate_range', 17),
             ('Current_Outstanding_max', 16),
             ('Urban_Development_Yes', 16),
             ('UnemploymentRate_median', 15),
             ('UnemploymentRate_meanAD', 14),
             ('UnemploymentRate_mean', 13),
             ('UnemploymentRate_kurtosis', 13),
             ('GDP_median', 13),
             ('Current_Instalment_Sequence_kurtosis', 12),
             ('RealEstate_Current_Inflation_mean', 12),
             ('current_Appraisal_value_max', 12),
             ('current_Appraisal_value_min', 11),
             ('eCommerceAccount_1', 11),
             ('Starting_outstanding', 10),
             ('Villa_House_Yes', 10),
             ('RealEstate_Current_Inflation_mad', 9),
             ('SocialMediaAccount_Yes', 9),
             ('Current_Outstanding_abs_avg', 8),
             ('RealEstate_Current_Inflation_median', 8),
             ('GDP_range', 7),
             ('Asset_type_No shred services', 6),
             ('ProfessionalLicensure_1', 6),
             ('Current_Outstanding_median', 6),
             ('Current_Instalment_Sequence_range', 5),
             ('payment_lenght', 5),
             ('GDP_abs_avg', 5),
             ('RealEstate_Current_Inflation_maxtoMin', 4),
             ('RealEstate_Current_Inflation_abs_avg', 4),
             ('RealEstate_Current_Inflation_range', 3),
             ('RealEstate_Current_Inflation_max', 3),
             ('GDP_maxtoMin', 3),
             ('RealEstate_Current_Inflation_min', 3),
             ('Current_Instalment_Sequence_median', 3),
             ('UnemploymentRate_mad', 3),
             ('GDP_max', 2),
             ('Investment_SelfOccupied_Self Occupancy', 2),
             ('UnemploymentRate_range', 2),
             ('current_Appraisal_value_abs_avg', 1)])
In [164]:
most_relevant_features=list(dict((k, v) for k, v in xgb_model4.get_booster().get_fscore().items() if v >= 4).keys())

Build MOdel On Top Imp features

In [165]:
train_x=df[most_relevant_features]
train_y=df['CreditRiskScore']
In [166]:
X_train, X_test, y_train, y_test  = train_test_split(train_x, train_y, test_size = 0.2, random_state = 106)
xgb_model5 = XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,
                         colsample_bytree=1, max_depth=7, n_jobs=-1)
xgb_model5.fit(X_train, y_train, early_stopping_rounds=5, 
             eval_set=[(X_test, y_test)], verbose=False)
y_train_pred5 = xgb_model5.predict(X_train)
y_pred5 = xgb_model5.predict(X_test)
    

print('Train r2 score: ', r2_score(y_train_pred5, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred5))
train_mse5 = mean_squared_error(y_train_pred5, y_train)
test_mse5 = mean_squared_error(y_pred5, y_test)
train_rmse5 = np.sqrt(train_mse5)
test_rmse5 = np.sqrt(test_mse5)
print('Train RMSE: %.4f' % train_rmse5)
print('Test RMSE: %.4f' % test_rmse5)
Train r2 score:  0.7537542244310707
Test r2 score:  0.7321241708603874
Train RMSE: 0.1070
Test RMSE: 0.1306

Hyperparameter Tuning

In [167]:
xgb1 = XGBRegressor()
parameters = {'nthread':[4], #when use hyperthread, xgboost may become slower
              'objective':['reg:linear'],
              'learning_rate': [0.03, 0.05, 0.08,0.1,0.3], #so called `eta` value
              'max_depth': [5, 6, 7],
              'min_child_weight': [4,6],
              'silent': [1],
              'subsample': [0.3,0.5,0.7,0.9],
              'colsample_bytree': [0.5,0.7,1],
              'n_estimators': [500]}

xgb_grid = RandomizedSearchCV(xgb1,
                        parameters,
                        cv = 5,
                        n_jobs = 5,
                        verbose=True)

xgb_grid.fit(X_train, y_train,early_stopping_rounds=5, 
             eval_set=[(X_test, y_test)])

print(xgb_grid.best_score_)
print(xgb_grid.best_params_)
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[Parallel(n_jobs=5)]: Done  40 tasks      | elapsed:  3.5min
[Parallel(n_jobs=5)]: Done  50 out of  50 | elapsed:  4.1min finished
[0]	validation_0-rmse:5.62273
Will train until validation_0-rmse hasn't improved in 5 rounds.
[1]	validation_0-rmse:5.34216
[2]	validation_0-rmse:5.07551
[3]	validation_0-rmse:4.82234
[4]	validation_0-rmse:4.5818
[5]	validation_0-rmse:4.35329
[6]	validation_0-rmse:4.1362
[7]	validation_0-rmse:3.92988
[8]	validation_0-rmse:3.73399
[9]	validation_0-rmse:3.54784
[10]	validation_0-rmse:3.37105
[11]	validation_0-rmse:3.2031
[12]	validation_0-rmse:3.04352
[13]	validation_0-rmse:2.89198
[14]	validation_0-rmse:2.74803
[15]	validation_0-rmse:2.61138
[16]	validation_0-rmse:2.48137
[17]	validation_0-rmse:2.35793
[18]	validation_0-rmse:2.24076
[19]	validation_0-rmse:2.12935
[20]	validation_0-rmse:2.02358
[21]	validation_0-rmse:1.92301
[22]	validation_0-rmse:1.82752
[23]	validation_0-rmse:1.73691
[24]	validation_0-rmse:1.65078
[25]	validation_0-rmse:1.56894
[26]	validation_0-rmse:1.49121
[27]	validation_0-rmse:1.41743
[28]	validation_0-rmse:1.34736
[29]	validation_0-rmse:1.28081
[30]	validation_0-rmse:1.21764
[31]	validation_0-rmse:1.15768
[32]	validation_0-rmse:1.10065
[33]	validation_0-rmse:1.04654
[34]	validation_0-rmse:0.995178
[35]	validation_0-rmse:0.946432
[36]	validation_0-rmse:0.900197
[37]	validation_0-rmse:0.856266
[38]	validation_0-rmse:0.814549
[39]	validation_0-rmse:0.774974
[40]	validation_0-rmse:0.737453
[41]	validation_0-rmse:0.701806
[42]	validation_0-rmse:0.668025
[43]	validation_0-rmse:0.636038
[44]	validation_0-rmse:0.605677
[45]	validation_0-rmse:0.576856
[46]	validation_0-rmse:0.549549
[47]	validation_0-rmse:0.523749
[48]	validation_0-rmse:0.499227
[49]	validation_0-rmse:0.475991
[50]	validation_0-rmse:0.454108
[51]	validation_0-rmse:0.433332
[52]	validation_0-rmse:0.413623
[53]	validation_0-rmse:0.395033
[54]	validation_0-rmse:0.377519
[55]	validation_0-rmse:0.360877
[56]	validation_0-rmse:0.345216
[57]	validation_0-rmse:0.330443
[58]	validation_0-rmse:0.316516
[59]	validation_0-rmse:0.303337
[60]	validation_0-rmse:0.29107
[61]	validation_0-rmse:0.279496
[62]	validation_0-rmse:0.268581
[63]	validation_0-rmse:0.258269
[64]	validation_0-rmse:0.248582
[65]	validation_0-rmse:0.239595
[66]	validation_0-rmse:0.231119
[67]	validation_0-rmse:0.223248
[68]	validation_0-rmse:0.215811
[69]	validation_0-rmse:0.208949
[70]	validation_0-rmse:0.202513
[71]	validation_0-rmse:0.19653
[72]	validation_0-rmse:0.190941
[73]	validation_0-rmse:0.185778
[74]	validation_0-rmse:0.180998
[75]	validation_0-rmse:0.176604
[76]	validation_0-rmse:0.172504
[77]	validation_0-rmse:0.168742
[78]	validation_0-rmse:0.165232
[79]	validation_0-rmse:0.162035
[80]	validation_0-rmse:0.159156
[81]	validation_0-rmse:0.156456
[82]	validation_0-rmse:0.153948
[83]	validation_0-rmse:0.151678
[84]	validation_0-rmse:0.149568
[85]	validation_0-rmse:0.147675
[86]	validation_0-rmse:0.145927
[87]	validation_0-rmse:0.144241
[88]	validation_0-rmse:0.142835
[89]	validation_0-rmse:0.14145
[90]	validation_0-rmse:0.140238
[91]	validation_0-rmse:0.139164
[92]	validation_0-rmse:0.138185
[93]	validation_0-rmse:0.137226
[94]	validation_0-rmse:0.13645
[95]	validation_0-rmse:0.135765
[96]	validation_0-rmse:0.135086
[97]	validation_0-rmse:0.134498
[98]	validation_0-rmse:0.133979
[99]	validation_0-rmse:0.133474
[100]	validation_0-rmse:0.133015
[101]	validation_0-rmse:0.132565
[102]	validation_0-rmse:0.132164
[103]	validation_0-rmse:0.131799
[104]	validation_0-rmse:0.13144
[105]	validation_0-rmse:0.131096
[106]	validation_0-rmse:0.130859
[107]	validation_0-rmse:0.130673
[108]	validation_0-rmse:0.13047
[109]	validation_0-rmse:0.130232
[110]	validation_0-rmse:0.130055
[111]	validation_0-rmse:0.129907
[112]	validation_0-rmse:0.129789
[113]	validation_0-rmse:0.129684
[114]	validation_0-rmse:0.129541
[115]	validation_0-rmse:0.129448
[116]	validation_0-rmse:0.129343
[117]	validation_0-rmse:0.129286
[118]	validation_0-rmse:0.129217
[119]	validation_0-rmse:0.129222
[120]	validation_0-rmse:0.129161
[121]	validation_0-rmse:0.129137
[122]	validation_0-rmse:0.12911
[123]	validation_0-rmse:0.129111
[124]	validation_0-rmse:0.12903
[125]	validation_0-rmse:0.128977
[126]	validation_0-rmse:0.12898
[127]	validation_0-rmse:0.128949
[128]	validation_0-rmse:0.128957
[129]	validation_0-rmse:0.128955
[130]	validation_0-rmse:0.128943
[131]	validation_0-rmse:0.128977
[132]	validation_0-rmse:0.128933
[133]	validation_0-rmse:0.128925
[134]	validation_0-rmse:0.128958
[135]	validation_0-rmse:0.128906
[136]	validation_0-rmse:0.128866
[137]	validation_0-rmse:0.128875
[138]	validation_0-rmse:0.128856
[139]	validation_0-rmse:0.128814
[140]	validation_0-rmse:0.128812
[141]	validation_0-rmse:0.128821
[142]	validation_0-rmse:0.128785
[143]	validation_0-rmse:0.12877
[144]	validation_0-rmse:0.128801
[145]	validation_0-rmse:0.128753
[146]	validation_0-rmse:0.128745
[147]	validation_0-rmse:0.12878
[148]	validation_0-rmse:0.128805
[149]	validation_0-rmse:0.128789
[150]	validation_0-rmse:0.128788
[151]	validation_0-rmse:0.128774
Stopping. Best iteration:
[146]	validation_0-rmse:0.128745

0.7202181466299385
{'subsample': 0.9, 'silent': 1, 'objective': 'reg:linear', 'nthread': 4, 'n_estimators': 500, 'min_child_weight': 4, 'max_depth': 6, 'learning_rate': 0.05, 'colsample_bytree': 1}
In [169]:
train_x=df[most_relevant_features]
train_y=df['CreditRiskScore']
X_train, X_test, y_train, y_test  = train_test_split(train_x, train_y, test_size = 0.2, random_state = 0)

Build model on best parameters

In [174]:
xgb_model6 = XGBRegressor(subsample= 0.9, silent= 1, 
                          nthread= 4, n_estimators= 500, min_child_weight= 4,
                          max_depth= 6, learning_rate= 0.05, colsample_bytree= 1)
xgb_model6.fit(X_train, y_train, early_stopping_rounds=5, 
             eval_set=[(X_test, y_test)], verbose=False)
y_train_pred6 = xgb_model6.predict(X_train)
y_pred5 = xgb_model6.predict(X_test)

print('Train r2 score: ', r2_score(y_train_pred5, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred5))
train_mse5 = mean_squared_error(y_train_pred5, y_train)
test_mse5 = mean_squared_error(y_pred5, y_test)
train_rmse5 = np.sqrt(train_mse5)
test_rmse5 = np.sqrt(test_mse5)
print('Train RMSE: %.4f' % train_rmse5)
print('Test RMSE: %.4f' % test_rmse5)
Train r2 score:  -1.3685038367793876
Test r2 score:  0.7385527157519085
Train RMSE: 0.3318
Test RMSE: 0.1295


ON LEADERBOARD SCORE- 113.55

4 KNN

In [176]:
#X, y = df.drop(['CreditRiskScore'], axis = 1), df['CreditRiskScore']

#X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size = 0.2, random_state = 0)
y= df1['CreditRiskScore']
X=df1.drop(['CreditRiskScore'], axis = 1)
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)  
In [177]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
#sc = StandardScaler()
x_train = sc.fit_transform(X_train)
x_test = sc.transform(X_test)
In [178]:
#import required packages
from sklearn import neighbors
from sklearn.metrics import mean_squared_error 
from math import sqrt
import matplotlib.pyplot as plt
%matplotlib inline

rmse_val = [] #to store rmse values for different k
for K in range(20):
    K = K+1
    model = neighbors.KNeighborsRegressor(n_neighbors = K)

    model.fit(x_train, y_train)  #fit the model
    pred=model.predict(x_test) #make prediction on test set
    error = sqrt(mean_squared_error(y_test,pred)) #calculate rmse
    rmse_val.append(error) #store rmse values
    print('RMSE value for k= ' , K , 'is:', error)

#plotting the rmse values against k values
curve = pd.DataFrame(rmse_val) #elbow curve 
curve.plot()
RMSE value for k=  1 is: 0.22495603156751662
RMSE value for k=  2 is: 0.19429195916963873
RMSE value for k=  3 is: 0.18400493391230277
RMSE value for k=  4 is: 0.1789503463487817
RMSE value for k=  5 is: 0.17748257707524592
RMSE value for k=  6 is: 0.17503836270323894
RMSE value for k=  7 is: 0.1723672192618586
RMSE value for k=  8 is: 0.17198828290621426
RMSE value for k=  9 is: 0.17124013864589804
RMSE value for k=  10 is: 0.17122409587813953
RMSE value for k=  11 is: 0.17094440372250025
RMSE value for k=  12 is: 0.16955128248578555
RMSE value for k=  13 is: 0.16911800175968053
RMSE value for k=  14 is: 0.1688904141492528
RMSE value for k=  15 is: 0.16912915089903818
RMSE value for k=  16 is: 0.16922759240942348
RMSE value for k=  17 is: 0.1689272458509503
RMSE value for k=  18 is: 0.16843956329135681
RMSE value for k=  19 is: 0.1679233857612626
RMSE value for k=  20 is: 0.16806369337244603
Out[178]:
<matplotlib.axes._subplots.AxesSubplot at 0x29e1e3182b0>
In [179]:
model = neighbors.KNeighborsRegressor(n_neighbors = 12)# best k value
model.fit(x_train, y_train)  #fit the model
pred=model.predict(x_test) 
knn_train=model.predict(x_train)
knn_test=model.predict(x_test)

print('Train r2 score: ', r2_score(knn_train, y_train))
print('Test r2 score: ', r2_score(y_test, knn_test))
train_mse5 = mean_squared_error(knn_train, y_train)
test_mse5 = mean_squared_error(knn_test, y_test)
train_rmse5 = np.sqrt(train_mse5)
test_rmse5 = np.sqrt(test_mse5)
print('Train RMSE: %.4f' % train_rmse5)
print('Test RMSE: %.4f' % test_rmse5)
Train r2 score:  0.27622429024986894
Test r2 score:  0.555263460010444
Train RMSE: 0.1565
Test RMSE: 0.1696


ON LEADERBOARD SCORE- 119.63

5 CAT BOOST

In [181]:
#Creating a training set for modeling and validation set to check model performance
train_cat_dt=train_data_1
X = train_cat_dt.drop(['CreditRiskScore'], axis=1)
y = train_cat_dt.CreditRiskScore

from sklearn.model_selection import train_test_split
X_train, X_validation, y_train, y_validation = train_test_split(X, y, train_size=0.7, random_state=1234)
In [182]:
categorical_features_indices = np.where(X_train.dtypes == np.object)[0]
In [183]:
#importing library and building model
from catboost import CatBoostRegressor
model=CatBoostRegressor(iterations=1000, depth=3, learning_rate=0.1, loss_function='RMSE',random_seed=42)
model.fit(X_train, y_train,cat_features=categorical_features_indices,eval_set=(X_validation, y_validation),plot=True)
0:	learn: 5.7773910	test: 5.7813836	best: 5.7813836 (0)	total: 186ms	remaining: 3m 5s
1:	learn: 5.2014118	test: 5.2051371	best: 5.2051371 (1)	total: 224ms	remaining: 1m 51s
2:	learn: 4.6839200	test: 4.6874697	best: 4.6874697 (2)	total: 268ms	remaining: 1m 28s
3:	learn: 4.2183221	test: 4.2218766	best: 4.2218766 (3)	total: 311ms	remaining: 1m 17s
4:	learn: 3.7988923	test: 3.8022856	best: 3.8022856 (4)	total: 350ms	remaining: 1m 9s
5:	learn: 3.4214286	test: 3.4247014	best: 3.4247014 (5)	total: 392ms	remaining: 1m 4s
6:	learn: 3.0814898	test: 3.0846036	best: 3.0846036 (6)	total: 432ms	remaining: 1m 1s
7:	learn: 2.7758050	test: 2.7787368	best: 2.7787368 (7)	total: 472ms	remaining: 58.5s
8:	learn: 2.5009178	test: 2.5040005	best: 2.5040005 (8)	total: 509ms	remaining: 56.1s
9:	learn: 2.2537551	test: 2.2567877	best: 2.2567877 (9)	total: 552ms	remaining: 54.7s
10:	learn: 2.0313703	test: 2.0343848	best: 2.0343848 (10)	total: 594ms	remaining: 53.4s
11:	learn: 1.8310595	test: 1.8339357	best: 1.8339357 (11)	total: 645ms	remaining: 53.1s
12:	learn: 1.6509995	test: 1.6535912	best: 1.6535912 (12)	total: 714ms	remaining: 54.2s
13:	learn: 1.4893882	test: 1.4919209	best: 1.4919209 (13)	total: 793ms	remaining: 55.9s
14:	learn: 1.3432789	test: 1.3458081	best: 1.3458081 (14)	total: 840ms	remaining: 55.2s
15:	learn: 1.2128168	test: 1.2153963	best: 1.2153963 (15)	total: 885ms	remaining: 54.4s
16:	learn: 1.0957346	test: 1.0982615	best: 1.0982615 (16)	total: 931ms	remaining: 53.8s
17:	learn: 0.9907887	test: 0.9931902	best: 0.9931902 (17)	total: 969ms	remaining: 52.9s
18:	learn: 0.8963048	test: 0.8986038	best: 0.8986038 (18)	total: 1.01s	remaining: 52.3s
19:	learn: 0.8113423	test: 0.8136687	best: 0.8136687 (19)	total: 1.05s	remaining: 51.4s
20:	learn: 0.7358981	test: 0.7380569	best: 0.7380569 (20)	total: 1.09s	remaining: 50.7s
21:	learn: 0.6680995	test: 0.6700729	best: 0.6700729 (21)	total: 1.13s	remaining: 50.2s
22:	learn: 0.6075144	test: 0.6095085	best: 0.6095085 (22)	total: 1.17s	remaining: 49.8s
23:	learn: 0.5533291	test: 0.5551448	best: 0.5551448 (23)	total: 1.22s	remaining: 49.5s
24:	learn: 0.5040360	test: 0.5057241	best: 0.5057241 (24)	total: 1.25s	remaining: 48.9s
25:	learn: 0.4610784	test: 0.4625633	best: 0.4625633 (25)	total: 1.3s	remaining: 48.6s
26:	learn: 0.4226982	test: 0.4238991	best: 0.4238991 (26)	total: 1.34s	remaining: 48.3s
27:	learn: 0.3889719	test: 0.3897938	best: 0.3897938 (27)	total: 1.38s	remaining: 47.9s
28:	learn: 0.3584026	test: 0.3590125	best: 0.3590125 (28)	total: 1.43s	remaining: 47.8s
29:	learn: 0.3322606	test: 0.3325270	best: 0.3325270 (29)	total: 1.47s	remaining: 47.4s
30:	learn: 0.3080209	test: 0.3081156	best: 0.3081156 (30)	total: 1.51s	remaining: 47.2s
31:	learn: 0.2874528	test: 0.2873457	best: 0.2873457 (31)	total: 1.55s	remaining: 46.9s
32:	learn: 0.2688908	test: 0.2685173	best: 0.2685173 (32)	total: 1.59s	remaining: 46.7s
33:	learn: 0.2529728	test: 0.2523210	best: 0.2523210 (33)	total: 1.64s	remaining: 46.6s
34:	learn: 0.2393157	test: 0.2383898	best: 0.2383898 (34)	total: 1.68s	remaining: 46.3s
35:	learn: 0.2267054	test: 0.2254976	best: 0.2254976 (35)	total: 1.73s	remaining: 46.2s
36:	learn: 0.2168932	test: 0.2153561	best: 0.2153561 (36)	total: 1.76s	remaining: 46s
37:	learn: 0.2079134	test: 0.2060616	best: 0.2060616 (37)	total: 1.81s	remaining: 45.8s
38:	learn: 0.2006981	test: 0.1985370	best: 0.1985370 (38)	total: 1.85s	remaining: 45.5s
39:	learn: 0.1936584	test: 0.1911908	best: 0.1911908 (39)	total: 1.89s	remaining: 45.3s
40:	learn: 0.1875123	test: 0.1850041	best: 0.1850041 (40)	total: 1.93s	remaining: 45.1s
41:	learn: 0.1828081	test: 0.1800893	best: 0.1800893 (41)	total: 1.97s	remaining: 45s
42:	learn: 0.1786665	test: 0.1759453	best: 0.1759453 (42)	total: 2.01s	remaining: 44.8s
43:	learn: 0.1754396	test: 0.1725272	best: 0.1725272 (43)	total: 2.05s	remaining: 44.6s
44:	learn: 0.1726145	test: 0.1695493	best: 0.1695493 (44)	total: 2.1s	remaining: 44.5s
45:	learn: 0.1703392	test: 0.1671276	best: 0.1671276 (45)	total: 2.13s	remaining: 44.3s
46:	learn: 0.1683680	test: 0.1650160	best: 0.1650160 (46)	total: 2.18s	remaining: 44.1s
47:	learn: 0.1667710	test: 0.1633361	best: 0.1633361 (47)	total: 2.21s	remaining: 43.9s
48:	learn: 0.1651196	test: 0.1616270	best: 0.1616270 (48)	total: 2.26s	remaining: 43.8s
49:	learn: 0.1640935	test: 0.1605264	best: 0.1605264 (49)	total: 2.3s	remaining: 43.7s
50:	learn: 0.1630825	test: 0.1594450	best: 0.1594450 (50)	total: 2.34s	remaining: 43.5s
51:	learn: 0.1622644	test: 0.1586174	best: 0.1586174 (51)	total: 2.38s	remaining: 43.3s
52:	learn: 0.1615359	test: 0.1578398	best: 0.1578398 (52)	total: 2.42s	remaining: 43.2s
53:	learn: 0.1604708	test: 0.1567495	best: 0.1567495 (53)	total: 2.46s	remaining: 43.1s
54:	learn: 0.1600289	test: 0.1563016	best: 0.1563016 (54)	total: 2.5s	remaining: 42.9s
55:	learn: 0.1592861	test: 0.1555942	best: 0.1555942 (55)	total: 2.55s	remaining: 42.9s
56:	learn: 0.1584744	test: 0.1547972	best: 0.1547972 (56)	total: 2.59s	remaining: 42.8s
57:	learn: 0.1579716	test: 0.1543486	best: 0.1543486 (57)	total: 2.63s	remaining: 42.7s
58:	learn: 0.1571360	test: 0.1535487	best: 0.1535487 (58)	total: 2.67s	remaining: 42.5s
59:	learn: 0.1566270	test: 0.1530833	best: 0.1530833 (59)	total: 2.71s	remaining: 42.4s
60:	learn: 0.1564060	test: 0.1528123	best: 0.1528123 (60)	total: 2.74s	remaining: 42.2s
61:	learn: 0.1561311	test: 0.1525510	best: 0.1525510 (61)	total: 2.79s	remaining: 42.2s
62:	learn: 0.1558326	test: 0.1522365	best: 0.1522365 (62)	total: 2.83s	remaining: 42s
63:	learn: 0.1555128	test: 0.1519412	best: 0.1519412 (63)	total: 2.87s	remaining: 41.9s
64:	learn: 0.1552000	test: 0.1516396	best: 0.1516396 (64)	total: 2.9s	remaining: 41.8s
65:	learn: 0.1550261	test: 0.1514595	best: 0.1514595 (65)	total: 2.94s	remaining: 41.7s
66:	learn: 0.1547958	test: 0.1512729	best: 0.1512729 (66)	total: 2.98s	remaining: 41.5s
67:	learn: 0.1545227	test: 0.1510669	best: 0.1510669 (67)	total: 3.02s	remaining: 41.4s
68:	learn: 0.1541459	test: 0.1507636	best: 0.1507636 (68)	total: 3.07s	remaining: 41.4s
69:	learn: 0.1539272	test: 0.1505625	best: 0.1505625 (69)	total: 3.1s	remaining: 41.3s
70:	learn: 0.1538666	test: 0.1505212	best: 0.1505212 (70)	total: 3.15s	remaining: 41.2s
71:	learn: 0.1538201	test: 0.1504677	best: 0.1504677 (71)	total: 3.19s	remaining: 41.1s
72:	learn: 0.1531812	test: 0.1497576	best: 0.1497576 (72)	total: 3.23s	remaining: 41s
73:	learn: 0.1530055	test: 0.1495956	best: 0.1495956 (73)	total: 3.28s	remaining: 41s
74:	learn: 0.1527883	test: 0.1494033	best: 0.1494033 (74)	total: 3.32s	remaining: 40.9s
75:	learn: 0.1525933	test: 0.1492108	best: 0.1492108 (75)	total: 3.35s	remaining: 40.8s
76:	learn: 0.1524934	test: 0.1491172	best: 0.1491172 (76)	total: 3.39s	remaining: 40.6s
77:	learn: 0.1523098	test: 0.1489744	best: 0.1489744 (77)	total: 3.43s	remaining: 40.6s
78:	learn: 0.1522330	test: 0.1488927	best: 0.1488927 (78)	total: 3.48s	remaining: 40.5s
79:	learn: 0.1522283	test: 0.1488841	best: 0.1488841 (79)	total: 3.52s	remaining: 40.4s
80:	learn: 0.1518708	test: 0.1485177	best: 0.1485177 (80)	total: 3.57s	remaining: 40.5s
81:	learn: 0.1518505	test: 0.1484953	best: 0.1484953 (81)	total: 3.61s	remaining: 40.4s
82:	learn: 0.1518320	test: 0.1484681	best: 0.1484681 (82)	total: 3.65s	remaining: 40.3s
83:	learn: 0.1516820	test: 0.1483563	best: 0.1483563 (83)	total: 3.7s	remaining: 40.3s
84:	learn: 0.1516382	test: 0.1483345	best: 0.1483345 (84)	total: 3.73s	remaining: 40.2s
85:	learn: 0.1515460	test: 0.1482914	best: 0.1482914 (85)	total: 3.77s	remaining: 40.1s
86:	learn: 0.1515194	test: 0.1482827	best: 0.1482827 (86)	total: 3.82s	remaining: 40.1s
87:	learn: 0.1514179	test: 0.1482104	best: 0.1482104 (87)	total: 3.86s	remaining: 40s
88:	learn: 0.1512249	test: 0.1480337	best: 0.1480337 (88)	total: 3.9s	remaining: 39.9s
89:	learn: 0.1511143	test: 0.1479343	best: 0.1479343 (89)	total: 3.94s	remaining: 39.8s
90:	learn: 0.1508281	test: 0.1477386	best: 0.1477386 (90)	total: 3.98s	remaining: 39.8s
91:	learn: 0.1507222	test: 0.1476732	best: 0.1476732 (91)	total: 4.02s	remaining: 39.7s
92:	learn: 0.1505818	test: 0.1475787	best: 0.1475787 (92)	total: 4.06s	remaining: 39.6s
93:	learn: 0.1505399	test: 0.1475384	best: 0.1475384 (93)	total: 4.11s	remaining: 39.6s
94:	learn: 0.1503704	test: 0.1473632	best: 0.1473632 (94)	total: 4.14s	remaining: 39.5s
95:	learn: 0.1500211	test: 0.1470127	best: 0.1470127 (95)	total: 4.19s	remaining: 39.4s
96:	learn: 0.1499641	test: 0.1469873	best: 0.1469873 (96)	total: 4.23s	remaining: 39.4s
97:	learn: 0.1495086	test: 0.1465410	best: 0.1465410 (97)	total: 4.27s	remaining: 39.3s
98:	learn: 0.1495085	test: 0.1465401	best: 0.1465401 (98)	total: 4.3s	remaining: 39.2s
99:	learn: 0.1494716	test: 0.1465185	best: 0.1465185 (99)	total: 4.34s	remaining: 39.1s
100:	learn: 0.1488589	test: 0.1458145	best: 0.1458145 (100)	total: 4.38s	remaining: 39s
101:	learn: 0.1486637	test: 0.1457163	best: 0.1457163 (101)	total: 4.42s	remaining: 39s
102:	learn: 0.1484465	test: 0.1455400	best: 0.1455400 (102)	total: 4.46s	remaining: 38.9s
103:	learn: 0.1482810	test: 0.1454164	best: 0.1454164 (103)	total: 4.51s	remaining: 38.9s
104:	learn: 0.1482570	test: 0.1454072	best: 0.1454072 (104)	total: 4.56s	remaining: 38.9s
105:	learn: 0.1481877	test: 0.1453542	best: 0.1453542 (105)	total: 4.6s	remaining: 38.8s
106:	learn: 0.1481187	test: 0.1452605	best: 0.1452605 (106)	total: 4.64s	remaining: 38.7s
107:	learn: 0.1479342	test: 0.1451397	best: 0.1451397 (107)	total: 4.68s	remaining: 38.7s
108:	learn: 0.1478800	test: 0.1450915	best: 0.1450915 (108)	total: 4.72s	remaining: 38.6s
109:	learn: 0.1477488	test: 0.1449839	best: 0.1449839 (109)	total: 4.77s	remaining: 38.6s
110:	learn: 0.1476827	test: 0.1449459	best: 0.1449459 (110)	total: 4.81s	remaining: 38.5s
111:	learn: 0.1476366	test: 0.1449314	best: 0.1449314 (111)	total: 4.84s	remaining: 38.4s
112:	learn: 0.1476031	test: 0.1449264	best: 0.1449264 (112)	total: 4.88s	remaining: 38.3s
113:	learn: 0.1475673	test: 0.1449230	best: 0.1449230 (113)	total: 4.92s	remaining: 38.3s
114:	learn: 0.1475422	test: 0.1449120	best: 0.1449120 (114)	total: 4.97s	remaining: 38.2s
115:	learn: 0.1474746	test: 0.1448615	best: 0.1448615 (115)	total: 5s	remaining: 38.1s
116:	learn: 0.1474645	test: 0.1448565	best: 0.1448565 (116)	total: 5.04s	remaining: 38.1s
117:	learn: 0.1474621	test: 0.1448527	best: 0.1448527 (117)	total: 5.09s	remaining: 38s
118:	learn: 0.1474340	test: 0.1448420	best: 0.1448420 (118)	total: 5.13s	remaining: 38s
119:	learn: 0.1473425	test: 0.1447521	best: 0.1447521 (119)	total: 5.17s	remaining: 37.9s
120:	learn: 0.1473412	test: 0.1447526	best: 0.1447521 (119)	total: 5.21s	remaining: 37.8s
121:	learn: 0.1471799	test: 0.1445838	best: 0.1445838 (121)	total: 5.25s	remaining: 37.8s
122:	learn: 0.1471656	test: 0.1445733	best: 0.1445733 (122)	total: 5.29s	remaining: 37.7s
123:	learn: 0.1470246	test: 0.1444900	best: 0.1444900 (123)	total: 5.34s	remaining: 37.7s
124:	learn: 0.1470034	test: 0.1444717	best: 0.1444717 (124)	total: 5.38s	remaining: 37.6s
125:	learn: 0.1470022	test: 0.1444757	best: 0.1444717 (124)	total: 5.42s	remaining: 37.6s
126:	learn: 0.1470013	test: 0.1444739	best: 0.1444717 (124)	total: 5.46s	remaining: 37.5s
127:	learn: 0.1468898	test: 0.1443601	best: 0.1443601 (127)	total: 5.51s	remaining: 37.6s
128:	learn: 0.1468558	test: 0.1443454	best: 0.1443454 (128)	total: 5.55s	remaining: 37.5s
129:	learn: 0.1466449	test: 0.1442289	best: 0.1442289 (129)	total: 5.61s	remaining: 37.5s
130:	learn: 0.1465218	test: 0.1441161	best: 0.1441161 (130)	total: 5.65s	remaining: 37.5s
131:	learn: 0.1460906	test: 0.1436553	best: 0.1436553 (131)	total: 5.7s	remaining: 37.5s
132:	learn: 0.1460160	test: 0.1436101	best: 0.1436101 (132)	total: 5.74s	remaining: 37.4s
133:	learn: 0.1460098	test: 0.1436046	best: 0.1436046 (133)	total: 5.78s	remaining: 37.4s
134:	learn: 0.1458171	test: 0.1434498	best: 0.1434498 (134)	total: 5.83s	remaining: 37.3s
135:	learn: 0.1456284	test: 0.1432977	best: 0.1432977 (135)	total: 5.87s	remaining: 37.3s
136:	learn: 0.1455158	test: 0.1432461	best: 0.1432461 (136)	total: 5.91s	remaining: 37.2s
137:	learn: 0.1453215	test: 0.1430601	best: 0.1430601 (137)	total: 5.95s	remaining: 37.2s
138:	learn: 0.1453095	test: 0.1430480	best: 0.1430480 (138)	total: 5.99s	remaining: 37.1s
139:	learn: 0.1451265	test: 0.1429634	best: 0.1429634 (139)	total: 6.03s	remaining: 37s
140:	learn: 0.1450700	test: 0.1429486	best: 0.1429486 (140)	total: 6.07s	remaining: 37s
141:	learn: 0.1450667	test: 0.1429474	best: 0.1429474 (141)	total: 6.12s	remaining: 37s
142:	learn: 0.1450611	test: 0.1429477	best: 0.1429474 (141)	total: 6.16s	remaining: 36.9s
143:	learn: 0.1450346	test: 0.1429330	best: 0.1429330 (143)	total: 6.2s	remaining: 36.8s
144:	learn: 0.1449933	test: 0.1429156	best: 0.1429156 (144)	total: 6.24s	remaining: 36.8s
145:	learn: 0.1449613	test: 0.1429004	best: 0.1429004 (145)	total: 6.28s	remaining: 36.7s
146:	learn: 0.1448594	test: 0.1427814	best: 0.1427814 (146)	total: 6.31s	remaining: 36.6s
147:	learn: 0.1448318	test: 0.1427812	best: 0.1427812 (147)	total: 6.35s	remaining: 36.5s
148:	learn: 0.1448290	test: 0.1427789	best: 0.1427789 (148)	total: 6.38s	remaining: 36.5s
149:	learn: 0.1447027	test: 0.1427111	best: 0.1427111 (149)	total: 6.43s	remaining: 36.4s
150:	learn: 0.1446554	test: 0.1426834	best: 0.1426834 (150)	total: 6.5s	remaining: 36.6s
151:	learn: 0.1445479	test: 0.1426397	best: 0.1426397 (151)	total: 6.58s	remaining: 36.7s
152:	learn: 0.1444569	test: 0.1425447	best: 0.1425447 (152)	total: 6.62s	remaining: 36.6s
153:	learn: 0.1443987	test: 0.1425452	best: 0.1425447 (152)	total: 6.66s	remaining: 36.6s
154:	learn: 0.1443764	test: 0.1425366	best: 0.1425366 (154)	total: 6.7s	remaining: 36.5s
155:	learn: 0.1442528	test: 0.1423933	best: 0.1423933 (155)	total: 6.74s	remaining: 36.5s
156:	learn: 0.1442128	test: 0.1423566	best: 0.1423566 (156)	total: 6.78s	remaining: 36.4s
157:	learn: 0.1442013	test: 0.1423415	best: 0.1423415 (157)	total: 6.82s	remaining: 36.3s
158:	learn: 0.1441910	test: 0.1423327	best: 0.1423327 (158)	total: 6.86s	remaining: 36.3s
159:	learn: 0.1441796	test: 0.1423259	best: 0.1423259 (159)	total: 6.89s	remaining: 36.2s
160:	learn: 0.1441718	test: 0.1423222	best: 0.1423222 (160)	total: 6.93s	remaining: 36.1s
161:	learn: 0.1441688	test: 0.1423259	best: 0.1423222 (160)	total: 6.97s	remaining: 36.1s
162:	learn: 0.1441581	test: 0.1423136	best: 0.1423136 (162)	total: 7.01s	remaining: 36s
163:	learn: 0.1441555	test: 0.1423129	best: 0.1423129 (163)	total: 7.05s	remaining: 35.9s
164:	learn: 0.1441319	test: 0.1423227	best: 0.1423129 (163)	total: 7.1s	remaining: 35.9s
165:	learn: 0.1441289	test: 0.1423227	best: 0.1423129 (163)	total: 7.14s	remaining: 35.9s
166:	learn: 0.1441245	test: 0.1423219	best: 0.1423129 (163)	total: 7.18s	remaining: 35.8s
167:	learn: 0.1441073	test: 0.1423245	best: 0.1423129 (163)	total: 7.22s	remaining: 35.7s
168:	learn: 0.1441050	test: 0.1423196	best: 0.1423129 (163)	total: 7.25s	remaining: 35.7s
169:	learn: 0.1440854	test: 0.1423065	best: 0.1423065 (169)	total: 7.29s	remaining: 35.6s
170:	learn: 0.1440777	test: 0.1422973	best: 0.1422973 (170)	total: 7.33s	remaining: 35.5s
171:	learn: 0.1438702	test: 0.1420606	best: 0.1420606 (171)	total: 7.37s	remaining: 35.5s
172:	learn: 0.1437627	test: 0.1420087	best: 0.1420087 (172)	total: 7.42s	remaining: 35.5s
173:	learn: 0.1437255	test: 0.1419724	best: 0.1419724 (173)	total: 7.46s	remaining: 35.4s
174:	learn: 0.1434729	test: 0.1416953	best: 0.1416953 (174)	total: 7.5s	remaining: 35.4s
175:	learn: 0.1434510	test: 0.1416956	best: 0.1416953 (174)	total: 7.55s	remaining: 35.4s
176:	learn: 0.1434420	test: 0.1416881	best: 0.1416881 (176)	total: 7.6s	remaining: 35.4s
177:	learn: 0.1433481	test: 0.1416379	best: 0.1416379 (177)	total: 7.64s	remaining: 35.3s
178:	learn: 0.1433392	test: 0.1416305	best: 0.1416305 (178)	total: 7.68s	remaining: 35.2s
179:	learn: 0.1433243	test: 0.1416342	best: 0.1416305 (178)	total: 7.72s	remaining: 35.2s
180:	learn: 0.1432884	test: 0.1416370	best: 0.1416305 (178)	total: 7.76s	remaining: 35.1s
181:	learn: 0.1432823	test: 0.1416291	best: 0.1416291 (181)	total: 7.79s	remaining: 35s
182:	learn: 0.1432812	test: 0.1416292	best: 0.1416291 (181)	total: 7.83s	remaining: 35s
183:	learn: 0.1432807	test: 0.1416282	best: 0.1416282 (183)	total: 7.87s	remaining: 34.9s
184:	learn: 0.1432801	test: 0.1416290	best: 0.1416282 (183)	total: 7.91s	remaining: 34.9s
185:	learn: 0.1432000	test: 0.1415431	best: 0.1415431 (185)	total: 7.95s	remaining: 34.8s
186:	learn: 0.1431865	test: 0.1415552	best: 0.1415431 (185)	total: 7.99s	remaining: 34.7s
187:	learn: 0.1431837	test: 0.1415554	best: 0.1415431 (185)	total: 8.04s	remaining: 34.7s
188:	learn: 0.1431452	test: 0.1415399	best: 0.1415399 (188)	total: 8.08s	remaining: 34.7s
189:	learn: 0.1429363	test: 0.1413129	best: 0.1413129 (189)	total: 8.12s	remaining: 34.6s
190:	learn: 0.1429257	test: 0.1413222	best: 0.1413129 (189)	total: 8.16s	remaining: 34.6s
191:	learn: 0.1428494	test: 0.1413222	best: 0.1413129 (189)	total: 8.2s	remaining: 34.5s
192:	learn: 0.1428440	test: 0.1413234	best: 0.1413129 (189)	total: 8.24s	remaining: 34.4s
193:	learn: 0.1428153	test: 0.1412925	best: 0.1412925 (193)	total: 8.28s	remaining: 34.4s
194:	learn: 0.1428130	test: 0.1412908	best: 0.1412908 (194)	total: 8.32s	remaining: 34.3s
195:	learn: 0.1428068	test: 0.1412926	best: 0.1412908 (194)	total: 8.36s	remaining: 34.3s
196:	learn: 0.1427890	test: 0.1412798	best: 0.1412798 (196)	total: 8.4s	remaining: 34.2s
197:	learn: 0.1427181	test: 0.1412369	best: 0.1412369 (197)	total: 8.44s	remaining: 34.2s
198:	learn: 0.1427166	test: 0.1412363	best: 0.1412363 (198)	total: 8.48s	remaining: 34.1s
199:	learn: 0.1426471	test: 0.1411337	best: 0.1411337 (199)	total: 8.53s	remaining: 34.1s
200:	learn: 0.1426431	test: 0.1411340	best: 0.1411337 (199)	total: 8.57s	remaining: 34.1s
201:	learn: 0.1426427	test: 0.1411347	best: 0.1411337 (199)	total: 8.6s	remaining: 34s
202:	learn: 0.1426399	test: 0.1411354	best: 0.1411337 (199)	total: 8.64s	remaining: 33.9s
203:	learn: 0.1426399	test: 0.1411353	best: 0.1411337 (199)	total: 8.69s	remaining: 33.9s
204:	learn: 0.1426207	test: 0.1411367	best: 0.1411337 (199)	total: 8.73s	remaining: 33.8s
205:	learn: 0.1426106	test: 0.1411377	best: 0.1411337 (199)	total: 8.76s	remaining: 33.8s
206:	learn: 0.1426103	test: 0.1411378	best: 0.1411337 (199)	total: 8.8s	remaining: 33.7s
207:	learn: 0.1426021	test: 0.1411253	best: 0.1411253 (207)	total: 8.84s	remaining: 33.7s
208:	learn: 0.1425358	test: 0.1411235	best: 0.1411235 (208)	total: 8.88s	remaining: 33.6s
209:	learn: 0.1425316	test: 0.1411191	best: 0.1411191 (209)	total: 8.92s	remaining: 33.5s
210:	learn: 0.1425162	test: 0.1411242	best: 0.1411191 (209)	total: 8.96s	remaining: 33.5s
211:	learn: 0.1425006	test: 0.1411269	best: 0.1411191 (209)	total: 9.01s	remaining: 33.5s
212:	learn: 0.1424189	test: 0.1410891	best: 0.1410891 (212)	total: 9.05s	remaining: 33.5s
213:	learn: 0.1423877	test: 0.1410746	best: 0.1410746 (213)	total: 9.09s	remaining: 33.4s
214:	learn: 0.1423827	test: 0.1410764	best: 0.1410746 (213)	total: 9.13s	remaining: 33.3s
215:	learn: 0.1423824	test: 0.1410757	best: 0.1410746 (213)	total: 9.17s	remaining: 33.3s
216:	learn: 0.1423470	test: 0.1410585	best: 0.1410585 (216)	total: 9.21s	remaining: 33.2s
217:	learn: 0.1423181	test: 0.1410401	best: 0.1410401 (217)	total: 9.25s	remaining: 33.2s
218:	learn: 0.1418765	test: 0.1406597	best: 0.1406597 (218)	total: 9.29s	remaining: 33.1s
219:	learn: 0.1418277	test: 0.1406200	best: 0.1406200 (219)	total: 9.34s	remaining: 33.1s
220:	learn: 0.1418118	test: 0.1406236	best: 0.1406200 (219)	total: 9.38s	remaining: 33.1s
221:	learn: 0.1417002	test: 0.1405977	best: 0.1405977 (221)	total: 9.42s	remaining: 33s
222:	learn: 0.1416237	test: 0.1405410	best: 0.1405410 (222)	total: 9.46s	remaining: 33s
223:	learn: 0.1415992	test: 0.1405446	best: 0.1405410 (222)	total: 9.5s	remaining: 32.9s
224:	learn: 0.1415903	test: 0.1405406	best: 0.1405406 (224)	total: 9.56s	remaining: 32.9s
225:	learn: 0.1415369	test: 0.1405202	best: 0.1405202 (225)	total: 9.6s	remaining: 32.9s
226:	learn: 0.1415021	test: 0.1404955	best: 0.1404955 (226)	total: 9.64s	remaining: 32.8s
227:	learn: 0.1415020	test: 0.1404955	best: 0.1404955 (226)	total: 9.68s	remaining: 32.8s
228:	learn: 0.1414250	test: 0.1404661	best: 0.1404661 (228)	total: 9.72s	remaining: 32.7s
229:	learn: 0.1414223	test: 0.1404706	best: 0.1404661 (228)	total: 9.76s	remaining: 32.7s
230:	learn: 0.1414076	test: 0.1404705	best: 0.1404661 (228)	total: 9.8s	remaining: 32.6s
231:	learn: 0.1413896	test: 0.1404640	best: 0.1404640 (231)	total: 9.85s	remaining: 32.6s
232:	learn: 0.1413876	test: 0.1404638	best: 0.1404638 (232)	total: 9.89s	remaining: 32.6s
233:	learn: 0.1413618	test: 0.1404700	best: 0.1404638 (232)	total: 9.93s	remaining: 32.5s
234:	learn: 0.1413600	test: 0.1404657	best: 0.1404638 (232)	total: 9.97s	remaining: 32.5s
235:	learn: 0.1412729	test: 0.1403800	best: 0.1403800 (235)	total: 10s	remaining: 32.4s
236:	learn: 0.1412728	test: 0.1403805	best: 0.1403800 (235)	total: 10s	remaining: 32.3s
237:	learn: 0.1412700	test: 0.1403811	best: 0.1403800 (235)	total: 10.1s	remaining: 32.3s
238:	learn: 0.1412249	test: 0.1403634	best: 0.1403634 (238)	total: 10.1s	remaining: 32.2s
239:	learn: 0.1412101	test: 0.1403597	best: 0.1403597 (239)	total: 10.2s	remaining: 32.2s
240:	learn: 0.1411565	test: 0.1403239	best: 0.1403239 (240)	total: 10.2s	remaining: 32.1s
241:	learn: 0.1411201	test: 0.1403230	best: 0.1403230 (241)	total: 10.2s	remaining: 32.1s
242:	learn: 0.1411145	test: 0.1403231	best: 0.1403230 (241)	total: 10.3s	remaining: 32s
243:	learn: 0.1411029	test: 0.1403283	best: 0.1403230 (241)	total: 10.3s	remaining: 32s
244:	learn: 0.1411023	test: 0.1403290	best: 0.1403230 (241)	total: 10.4s	remaining: 31.9s
245:	learn: 0.1410766	test: 0.1403077	best: 0.1403077 (245)	total: 10.4s	remaining: 31.8s
246:	learn: 0.1410691	test: 0.1403149	best: 0.1403077 (245)	total: 10.4s	remaining: 31.8s
247:	learn: 0.1409489	test: 0.1402558	best: 0.1402558 (247)	total: 10.5s	remaining: 31.7s
248:	learn: 0.1409344	test: 0.1402539	best: 0.1402539 (248)	total: 10.5s	remaining: 31.7s
249:	learn: 0.1409241	test: 0.1402450	best: 0.1402450 (249)	total: 10.6s	remaining: 31.7s
250:	learn: 0.1409229	test: 0.1402448	best: 0.1402448 (250)	total: 10.6s	remaining: 31.7s
251:	learn: 0.1409165	test: 0.1402472	best: 0.1402448 (250)	total: 10.7s	remaining: 31.7s
252:	learn: 0.1409150	test: 0.1402503	best: 0.1402448 (250)	total: 10.7s	remaining: 31.6s
253:	learn: 0.1409126	test: 0.1402541	best: 0.1402448 (250)	total: 10.7s	remaining: 31.5s
254:	learn: 0.1408419	test: 0.1401851	best: 0.1401851 (254)	total: 10.8s	remaining: 31.5s
255:	learn: 0.1408398	test: 0.1401860	best: 0.1401851 (254)	total: 10.8s	remaining: 31.5s
256:	learn: 0.1408366	test: 0.1401855	best: 0.1401851 (254)	total: 10.9s	remaining: 31.4s
257:	learn: 0.1408282	test: 0.1401863	best: 0.1401851 (254)	total: 10.9s	remaining: 31.3s
258:	learn: 0.1407658	test: 0.1401660	best: 0.1401660 (258)	total: 10.9s	remaining: 31.3s
259:	learn: 0.1407607	test: 0.1401659	best: 0.1401659 (259)	total: 11s	remaining: 31.2s
260:	learn: 0.1406686	test: 0.1401085	best: 0.1401085 (260)	total: 11s	remaining: 31.2s
261:	learn: 0.1405604	test: 0.1399765	best: 0.1399765 (261)	total: 11.1s	remaining: 31.1s
262:	learn: 0.1405438	test: 0.1399800	best: 0.1399765 (261)	total: 11.1s	remaining: 31.1s
263:	learn: 0.1405358	test: 0.1399693	best: 0.1399693 (263)	total: 11.1s	remaining: 31s
264:	learn: 0.1404986	test: 0.1399235	best: 0.1399235 (264)	total: 11.2s	remaining: 31s
265:	learn: 0.1404984	test: 0.1399235	best: 0.1399235 (264)	total: 11.2s	remaining: 31s
266:	learn: 0.1403587	test: 0.1398603	best: 0.1398603 (266)	total: 11.3s	remaining: 30.9s
267:	learn: 0.1403582	test: 0.1398588	best: 0.1398588 (267)	total: 11.3s	remaining: 30.9s
268:	learn: 0.1403096	test: 0.1398110	best: 0.1398110 (268)	total: 11.3s	remaining: 30.8s
269:	learn: 0.1402877	test: 0.1397880	best: 0.1397880 (269)	total: 11.4s	remaining: 30.8s
270:	learn: 0.1402829	test: 0.1397879	best: 0.1397879 (270)	total: 11.4s	remaining: 30.7s
271:	learn: 0.1402778	test: 0.1397837	best: 0.1397837 (271)	total: 11.5s	remaining: 30.7s
272:	learn: 0.1402646	test: 0.1397899	best: 0.1397837 (271)	total: 11.5s	remaining: 30.6s
273:	learn: 0.1402150	test: 0.1397716	best: 0.1397716 (273)	total: 11.6s	remaining: 30.6s
274:	learn: 0.1402120	test: 0.1397713	best: 0.1397713 (274)	total: 11.6s	remaining: 30.6s
275:	learn: 0.1402003	test: 0.1397693	best: 0.1397693 (275)	total: 11.7s	remaining: 30.6s
276:	learn: 0.1401918	test: 0.1397674	best: 0.1397674 (276)	total: 11.7s	remaining: 30.5s
277:	learn: 0.1401422	test: 0.1397206	best: 0.1397206 (277)	total: 11.8s	remaining: 30.5s
278:	learn: 0.1401147	test: 0.1396977	best: 0.1396977 (278)	total: 11.8s	remaining: 30.5s
279:	learn: 0.1400381	test: 0.1396969	best: 0.1396969 (279)	total: 11.9s	remaining: 30.5s
280:	learn: 0.1399796	test: 0.1396379	best: 0.1396379 (280)	total: 11.9s	remaining: 30.5s
281:	learn: 0.1399769	test: 0.1396406	best: 0.1396379 (280)	total: 11.9s	remaining: 30.4s
282:	learn: 0.1399754	test: 0.1396435	best: 0.1396379 (280)	total: 12s	remaining: 30.4s
283:	learn: 0.1399688	test: 0.1396374	best: 0.1396374 (283)	total: 12s	remaining: 30.3s
284:	learn: 0.1399664	test: 0.1396431	best: 0.1396374 (283)	total: 12.1s	remaining: 30.3s
285:	learn: 0.1399324	test: 0.1396312	best: 0.1396312 (285)	total: 12.1s	remaining: 30.2s
286:	learn: 0.1399044	test: 0.1396139	best: 0.1396139 (286)	total: 12.1s	remaining: 30.2s
287:	learn: 0.1398214	test: 0.1395742	best: 0.1395742 (287)	total: 12.2s	remaining: 30.1s
288:	learn: 0.1398174	test: 0.1395787	best: 0.1395742 (287)	total: 12.2s	remaining: 30.1s
289:	learn: 0.1398088	test: 0.1395880	best: 0.1395742 (287)	total: 12.3s	remaining: 30s
290:	learn: 0.1397109	test: 0.1395036	best: 0.1395036 (290)	total: 12.3s	remaining: 30s
291:	learn: 0.1396792	test: 0.1395026	best: 0.1395026 (291)	total: 12.3s	remaining: 29.9s
292:	learn: 0.1396765	test: 0.1395023	best: 0.1395023 (292)	total: 12.4s	remaining: 29.9s
293:	learn: 0.1396745	test: 0.1395056	best: 0.1395023 (292)	total: 12.4s	remaining: 29.8s
294:	learn: 0.1396725	test: 0.1395096	best: 0.1395023 (292)	total: 12.5s	remaining: 29.8s
295:	learn: 0.1396059	test: 0.1395229	best: 0.1395023 (292)	total: 12.5s	remaining: 29.8s
296:	learn: 0.1395744	test: 0.1395162	best: 0.1395023 (292)	total: 12.6s	remaining: 29.7s
297:	learn: 0.1395723	test: 0.1395161	best: 0.1395023 (292)	total: 12.6s	remaining: 29.7s
298:	learn: 0.1395703	test: 0.1395160	best: 0.1395023 (292)	total: 12.6s	remaining: 29.7s
299:	learn: 0.1395494	test: 0.1395288	best: 0.1395023 (292)	total: 12.7s	remaining: 29.6s
300:	learn: 0.1394954	test: 0.1395072	best: 0.1395023 (292)	total: 12.7s	remaining: 29.6s
301:	learn: 0.1394497	test: 0.1394776	best: 0.1394776 (301)	total: 12.8s	remaining: 29.5s
302:	learn: 0.1394065	test: 0.1394613	best: 0.1394613 (302)	total: 12.8s	remaining: 29.5s
303:	learn: 0.1394042	test: 0.1394625	best: 0.1394613 (302)	total: 12.8s	remaining: 29.4s
304:	learn: 0.1394041	test: 0.1394625	best: 0.1394613 (302)	total: 12.9s	remaining: 29.4s
305:	learn: 0.1393769	test: 0.1394564	best: 0.1394564 (305)	total: 12.9s	remaining: 29.3s
306:	learn: 0.1393705	test: 0.1394610	best: 0.1394564 (305)	total: 13s	remaining: 29.3s
307:	learn: 0.1393405	test: 0.1394325	best: 0.1394325 (307)	total: 13s	remaining: 29.2s
308:	learn: 0.1393366	test: 0.1394324	best: 0.1394324 (308)	total: 13s	remaining: 29.2s
309:	learn: 0.1392854	test: 0.1394275	best: 0.1394275 (309)	total: 13.1s	remaining: 29.1s
310:	learn: 0.1392851	test: 0.1394269	best: 0.1394269 (310)	total: 13.1s	remaining: 29.1s
311:	learn: 0.1392848	test: 0.1394264	best: 0.1394264 (311)	total: 13.2s	remaining: 29s
312:	learn: 0.1392683	test: 0.1394288	best: 0.1394264 (311)	total: 13.2s	remaining: 29s
313:	learn: 0.1390529	test: 0.1392579	best: 0.1392579 (313)	total: 13.2s	remaining: 28.9s
314:	learn: 0.1390479	test: 0.1392604	best: 0.1392579 (313)	total: 13.3s	remaining: 28.9s
315:	learn: 0.1388930	test: 0.1391180	best: 0.1391180 (315)	total: 13.3s	remaining: 28.8s
316:	learn: 0.1388915	test: 0.1391175	best: 0.1391175 (316)	total: 13.4s	remaining: 28.8s
317:	learn: 0.1388882	test: 0.1391160	best: 0.1391160 (317)	total: 13.4s	remaining: 28.7s
318:	learn: 0.1388647	test: 0.1390969	best: 0.1390969 (318)	total: 13.4s	remaining: 28.7s
319:	learn: 0.1388395	test: 0.1391111	best: 0.1390969 (318)	total: 13.5s	remaining: 28.6s
320:	learn: 0.1387523	test: 0.1390670	best: 0.1390670 (320)	total: 13.5s	remaining: 28.6s
321:	learn: 0.1387244	test: 0.1390352	best: 0.1390352 (321)	total: 13.6s	remaining: 28.6s
322:	learn: 0.1387183	test: 0.1390327	best: 0.1390327 (322)	total: 13.6s	remaining: 28.5s
323:	learn: 0.1386713	test: 0.1390079	best: 0.1390079 (323)	total: 13.7s	remaining: 28.5s
324:	learn: 0.1386697	test: 0.1390066	best: 0.1390066 (324)	total: 13.7s	remaining: 28.5s
325:	learn: 0.1386691	test: 0.1390081	best: 0.1390066 (324)	total: 13.7s	remaining: 28.4s
326:	learn: 0.1386426	test: 0.1389995	best: 0.1389995 (326)	total: 13.8s	remaining: 28.4s
327:	learn: 0.1386367	test: 0.1390007	best: 0.1389995 (326)	total: 13.8s	remaining: 28.3s
328:	learn: 0.1386177	test: 0.1389999	best: 0.1389995 (326)	total: 13.9s	remaining: 28.3s
329:	learn: 0.1385900	test: 0.1389959	best: 0.1389959 (329)	total: 13.9s	remaining: 28.2s
330:	learn: 0.1385807	test: 0.1389975	best: 0.1389959 (329)	total: 13.9s	remaining: 28.2s
331:	learn: 0.1385742	test: 0.1390031	best: 0.1389959 (329)	total: 14s	remaining: 28.1s
332:	learn: 0.1385738	test: 0.1390031	best: 0.1389959 (329)	total: 14s	remaining: 28.1s
333:	learn: 0.1385510	test: 0.1389992	best: 0.1389959 (329)	total: 14.1s	remaining: 28.1s
334:	learn: 0.1385374	test: 0.1390005	best: 0.1389959 (329)	total: 14.1s	remaining: 28s
335:	learn: 0.1385313	test: 0.1390010	best: 0.1389959 (329)	total: 14.1s	remaining: 28s
336:	learn: 0.1385286	test: 0.1389969	best: 0.1389959 (329)	total: 14.2s	remaining: 27.9s
337:	learn: 0.1384616	test: 0.1389892	best: 0.1389892 (337)	total: 14.2s	remaining: 27.9s
338:	learn: 0.1384571	test: 0.1389905	best: 0.1389892 (337)	total: 14.3s	remaining: 27.8s
339:	learn: 0.1384567	test: 0.1389906	best: 0.1389892 (337)	total: 14.3s	remaining: 27.8s
340:	learn: 0.1384477	test: 0.1389921	best: 0.1389892 (337)	total: 14.4s	remaining: 27.8s
341:	learn: 0.1384370	test: 0.1389870	best: 0.1389870 (341)	total: 14.4s	remaining: 27.7s
342:	learn: 0.1384274	test: 0.1389804	best: 0.1389804 (342)	total: 14.4s	remaining: 27.7s
343:	learn: 0.1384090	test: 0.1389789	best: 0.1389789 (343)	total: 14.5s	remaining: 27.6s
344:	learn: 0.1383903	test: 0.1389795	best: 0.1389789 (343)	total: 14.5s	remaining: 27.6s
345:	learn: 0.1383036	test: 0.1389245	best: 0.1389245 (345)	total: 14.6s	remaining: 27.6s
346:	learn: 0.1383015	test: 0.1389237	best: 0.1389237 (346)	total: 14.6s	remaining: 27.5s
347:	learn: 0.1382476	test: 0.1389255	best: 0.1389237 (346)	total: 14.7s	remaining: 27.5s
348:	learn: 0.1382173	test: 0.1388901	best: 0.1388901 (348)	total: 14.7s	remaining: 27.5s
349:	learn: 0.1382006	test: 0.1388764	best: 0.1388764 (349)	total: 14.8s	remaining: 27.4s
350:	learn: 0.1381904	test: 0.1388805	best: 0.1388764 (349)	total: 14.8s	remaining: 27.4s
351:	learn: 0.1381455	test: 0.1388774	best: 0.1388764 (349)	total: 14.8s	remaining: 27.3s
352:	learn: 0.1381452	test: 0.1388768	best: 0.1388764 (349)	total: 14.9s	remaining: 27.3s
353:	learn: 0.1380593	test: 0.1388127	best: 0.1388127 (353)	total: 14.9s	remaining: 27.2s
354:	learn: 0.1379583	test: 0.1387721	best: 0.1387721 (354)	total: 15s	remaining: 27.2s
355:	learn: 0.1379482	test: 0.1387780	best: 0.1387721 (354)	total: 15s	remaining: 27.1s
356:	learn: 0.1379471	test: 0.1387779	best: 0.1387721 (354)	total: 15s	remaining: 27.1s
357:	learn: 0.1379085	test: 0.1387767	best: 0.1387721 (354)	total: 15.1s	remaining: 27.1s
358:	learn: 0.1378933	test: 0.1387653	best: 0.1387653 (358)	total: 15.1s	remaining: 27s
359:	learn: 0.1378919	test: 0.1387645	best: 0.1387645 (359)	total: 15.2s	remaining: 27s
360:	learn: 0.1378797	test: 0.1387659	best: 0.1387645 (359)	total: 15.2s	remaining: 26.9s
361:	learn: 0.1378687	test: 0.1387575	best: 0.1387575 (361)	total: 15.2s	remaining: 26.9s
362:	learn: 0.1378543	test: 0.1387495	best: 0.1387495 (362)	total: 15.3s	remaining: 26.8s
363:	learn: 0.1378229	test: 0.1387197	best: 0.1387197 (363)	total: 15.3s	remaining: 26.8s
364:	learn: 0.1378115	test: 0.1387223	best: 0.1387197 (363)	total: 15.4s	remaining: 26.7s
365:	learn: 0.1377970	test: 0.1387149	best: 0.1387149 (365)	total: 15.4s	remaining: 26.8s
366:	learn: 0.1377970	test: 0.1387151	best: 0.1387149 (365)	total: 15.5s	remaining: 26.7s
367:	learn: 0.1377638	test: 0.1387041	best: 0.1387041 (367)	total: 15.5s	remaining: 26.7s
368:	learn: 0.1377576	test: 0.1387011	best: 0.1387011 (368)	total: 15.6s	remaining: 26.7s
369:	learn: 0.1377566	test: 0.1386986	best: 0.1386986 (369)	total: 15.6s	remaining: 26.6s
370:	learn: 0.1377498	test: 0.1387000	best: 0.1386986 (369)	total: 15.7s	remaining: 26.6s
371:	learn: 0.1376547	test: 0.1386284	best: 0.1386284 (371)	total: 15.7s	remaining: 26.6s
372:	learn: 0.1376524	test: 0.1386335	best: 0.1386284 (371)	total: 15.8s	remaining: 26.5s
373:	learn: 0.1376306	test: 0.1386425	best: 0.1386284 (371)	total: 15.8s	remaining: 26.5s
374:	learn: 0.1376199	test: 0.1386443	best: 0.1386284 (371)	total: 15.8s	remaining: 26.4s
375:	learn: 0.1376187	test: 0.1386444	best: 0.1386284 (371)	total: 15.9s	remaining: 26.4s
376:	learn: 0.1376152	test: 0.1386437	best: 0.1386284 (371)	total: 15.9s	remaining: 26.3s
377:	learn: 0.1376146	test: 0.1386435	best: 0.1386284 (371)	total: 16s	remaining: 26.3s
378:	learn: 0.1376027	test: 0.1386393	best: 0.1386284 (371)	total: 16s	remaining: 26.2s
379:	learn: 0.1376019	test: 0.1386390	best: 0.1386284 (371)	total: 16s	remaining: 26.2s
380:	learn: 0.1376019	test: 0.1386390	best: 0.1386284 (371)	total: 16.1s	remaining: 26.1s
381:	learn: 0.1375692	test: 0.1386442	best: 0.1386284 (371)	total: 16.1s	remaining: 26.1s
382:	learn: 0.1375540	test: 0.1386489	best: 0.1386284 (371)	total: 16.2s	remaining: 26s
383:	learn: 0.1374847	test: 0.1386050	best: 0.1386050 (383)	total: 16.2s	remaining: 26s
384:	learn: 0.1374843	test: 0.1386050	best: 0.1386050 (384)	total: 16.2s	remaining: 25.9s
385:	learn: 0.1373558	test: 0.1385034	best: 0.1385034 (385)	total: 16.3s	remaining: 25.9s
386:	learn: 0.1373205	test: 0.1385051	best: 0.1385034 (385)	total: 16.3s	remaining: 25.9s
387:	learn: 0.1372927	test: 0.1385097	best: 0.1385034 (385)	total: 16.4s	remaining: 25.8s
388:	learn: 0.1372610	test: 0.1385098	best: 0.1385034 (385)	total: 16.4s	remaining: 25.8s
389:	learn: 0.1372501	test: 0.1385044	best: 0.1385034 (385)	total: 16.4s	remaining: 25.7s
390:	learn: 0.1372491	test: 0.1385035	best: 0.1385034 (385)	total: 16.5s	remaining: 25.7s
391:	learn: 0.1372040	test: 0.1384574	best: 0.1384574 (391)	total: 16.5s	remaining: 25.6s
392:	learn: 0.1371686	test: 0.1384532	best: 0.1384532 (392)	total: 16.6s	remaining: 25.6s
393:	learn: 0.1371553	test: 0.1384461	best: 0.1384461 (393)	total: 16.6s	remaining: 25.6s
394:	learn: 0.1371517	test: 0.1384476	best: 0.1384461 (393)	total: 16.7s	remaining: 25.6s
395:	learn: 0.1371489	test: 0.1384495	best: 0.1384461 (393)	total: 16.7s	remaining: 25.5s
396:	learn: 0.1370897	test: 0.1384366	best: 0.1384366 (396)	total: 16.8s	remaining: 25.5s
397:	learn: 0.1370855	test: 0.1384356	best: 0.1384356 (397)	total: 16.8s	remaining: 25.4s
398:	learn: 0.1370763	test: 0.1384384	best: 0.1384356 (397)	total: 16.8s	remaining: 25.4s
399:	learn: 0.1370548	test: 0.1384342	best: 0.1384342 (399)	total: 16.9s	remaining: 25.3s
400:	learn: 0.1370534	test: 0.1384351	best: 0.1384342 (399)	total: 16.9s	remaining: 25.3s
401:	learn: 0.1370428	test: 0.1384304	best: 0.1384304 (401)	total: 17s	remaining: 25.2s
402:	learn: 0.1370277	test: 0.1384177	best: 0.1384177 (402)	total: 17s	remaining: 25.2s
403:	learn: 0.1370205	test: 0.1384091	best: 0.1384091 (403)	total: 17s	remaining: 25.1s
404:	learn: 0.1370054	test: 0.1384242	best: 0.1384091 (403)	total: 17.1s	remaining: 25.1s
405:	learn: 0.1369940	test: 0.1384339	best: 0.1384091 (403)	total: 17.1s	remaining: 25.1s
406:	learn: 0.1369701	test: 0.1384265	best: 0.1384091 (403)	total: 17.2s	remaining: 25s
407:	learn: 0.1369601	test: 0.1384225	best: 0.1384091 (403)	total: 17.2s	remaining: 25s
408:	learn: 0.1368419	test: 0.1383123	best: 0.1383123 (408)	total: 17.2s	remaining: 24.9s
409:	learn: 0.1368280	test: 0.1383123	best: 0.1383123 (409)	total: 17.3s	remaining: 24.9s
410:	learn: 0.1368141	test: 0.1383137	best: 0.1383123 (409)	total: 17.3s	remaining: 24.8s
411:	learn: 0.1367623	test: 0.1383025	best: 0.1383025 (411)	total: 17.4s	remaining: 24.8s
412:	learn: 0.1367494	test: 0.1382979	best: 0.1382979 (412)	total: 17.4s	remaining: 24.7s
413:	learn: 0.1367429	test: 0.1382986	best: 0.1382979 (412)	total: 17.4s	remaining: 24.7s
414:	learn: 0.1367390	test: 0.1383011	best: 0.1382979 (412)	total: 17.5s	remaining: 24.6s
415:	learn: 0.1367336	test: 0.1382992	best: 0.1382979 (412)	total: 17.5s	remaining: 24.6s
416:	learn: 0.1367075	test: 0.1382857	best: 0.1382857 (416)	total: 17.6s	remaining: 24.6s
417:	learn: 0.1366982	test: 0.1382942	best: 0.1382857 (416)	total: 17.6s	remaining: 24.5s
418:	learn: 0.1366585	test: 0.1382750	best: 0.1382750 (418)	total: 17.7s	remaining: 24.5s
419:	learn: 0.1366453	test: 0.1382676	best: 0.1382676 (419)	total: 17.7s	remaining: 24.5s
420:	learn: 0.1366401	test: 0.1382634	best: 0.1382634 (420)	total: 17.8s	remaining: 24.4s
421:	learn: 0.1366391	test: 0.1382636	best: 0.1382634 (420)	total: 17.8s	remaining: 24.4s
422:	learn: 0.1366329	test: 0.1382577	best: 0.1382577 (422)	total: 17.8s	remaining: 24.3s
423:	learn: 0.1366174	test: 0.1382753	best: 0.1382577 (422)	total: 17.9s	remaining: 24.3s
424:	learn: 0.1366130	test: 0.1382702	best: 0.1382577 (422)	total: 17.9s	remaining: 24.2s
425:	learn: 0.1365896	test: 0.1382628	best: 0.1382577 (422)	total: 18s	remaining: 24.2s
426:	learn: 0.1365886	test: 0.1382625	best: 0.1382577 (422)	total: 18s	remaining: 24.2s
427:	learn: 0.1365851	test: 0.1382687	best: 0.1382577 (422)	total: 18s	remaining: 24.1s
428:	learn: 0.1365804	test: 0.1382741	best: 0.1382577 (422)	total: 18.1s	remaining: 24.1s
429:	learn: 0.1365342	test: 0.1382460	best: 0.1382460 (429)	total: 18.1s	remaining: 24s
430:	learn: 0.1365313	test: 0.1382515	best: 0.1382460 (429)	total: 18.2s	remaining: 24s
431:	learn: 0.1365280	test: 0.1382487	best: 0.1382460 (429)	total: 18.2s	remaining: 23.9s
432:	learn: 0.1365150	test: 0.1382358	best: 0.1382358 (432)	total: 18.2s	remaining: 23.9s
433:	learn: 0.1364991	test: 0.1382323	best: 0.1382323 (433)	total: 18.3s	remaining: 23.8s
434:	learn: 0.1364932	test: 0.1382360	best: 0.1382323 (433)	total: 18.3s	remaining: 23.8s
435:	learn: 0.1364888	test: 0.1382341	best: 0.1382323 (433)	total: 18.4s	remaining: 23.8s
436:	learn: 0.1364865	test: 0.1382397	best: 0.1382323 (433)	total: 18.4s	remaining: 23.7s
437:	learn: 0.1364852	test: 0.1382374	best: 0.1382323 (433)	total: 18.4s	remaining: 23.7s
438:	learn: 0.1364616	test: 0.1382381	best: 0.1382323 (433)	total: 18.5s	remaining: 23.6s
439:	learn: 0.1364497	test: 0.1382293	best: 0.1382293 (439)	total: 18.5s	remaining: 23.6s
440:	learn: 0.1364074	test: 0.1382072	best: 0.1382072 (440)	total: 18.6s	remaining: 23.5s
441:	learn: 0.1364055	test: 0.1382076	best: 0.1382072 (440)	total: 18.6s	remaining: 23.5s
442:	learn: 0.1364041	test: 0.1382112	best: 0.1382072 (440)	total: 18.6s	remaining: 23.4s
443:	learn: 0.1363833	test: 0.1381993	best: 0.1381993 (443)	total: 18.7s	remaining: 23.5s
444:	learn: 0.1363775	test: 0.1381978	best: 0.1381978 (444)	total: 18.8s	remaining: 23.4s
445:	learn: 0.1363728	test: 0.1382009	best: 0.1381978 (444)	total: 18.8s	remaining: 23.4s
446:	learn: 0.1363699	test: 0.1382009	best: 0.1381978 (444)	total: 18.9s	remaining: 23.3s
447:	learn: 0.1363517	test: 0.1382017	best: 0.1381978 (444)	total: 18.9s	remaining: 23.3s
448:	learn: 0.1363325	test: 0.1381963	best: 0.1381963 (448)	total: 18.9s	remaining: 23.2s
449:	learn: 0.1362984	test: 0.1382062	best: 0.1381963 (448)	total: 19s	remaining: 23.2s
450:	learn: 0.1362469	test: 0.1381523	best: 0.1381523 (450)	total: 19s	remaining: 23.2s
451:	learn: 0.1362365	test: 0.1381456	best: 0.1381456 (451)	total: 19.1s	remaining: 23.1s
452:	learn: 0.1362336	test: 0.1381515	best: 0.1381456 (451)	total: 19.1s	remaining: 23.1s
453:	learn: 0.1362330	test: 0.1381516	best: 0.1381456 (451)	total: 19.2s	remaining: 23s
454:	learn: 0.1361653	test: 0.1381094	best: 0.1381094 (454)	total: 19.2s	remaining: 23s
455:	learn: 0.1361645	test: 0.1381095	best: 0.1381094 (454)	total: 19.2s	remaining: 23s
456:	learn: 0.1361550	test: 0.1381109	best: 0.1381094 (454)	total: 19.3s	remaining: 22.9s
457:	learn: 0.1361549	test: 0.1381107	best: 0.1381094 (454)	total: 19.3s	remaining: 22.9s
458:	learn: 0.1361432	test: 0.1381007	best: 0.1381007 (458)	total: 19.4s	remaining: 22.8s
459:	learn: 0.1361265	test: 0.1380831	best: 0.1380831 (459)	total: 19.4s	remaining: 22.8s
460:	learn: 0.1361028	test: 0.1380841	best: 0.1380831 (459)	total: 19.4s	remaining: 22.7s
461:	learn: 0.1361028	test: 0.1380841	best: 0.1380831 (459)	total: 19.5s	remaining: 22.7s
462:	learn: 0.1360670	test: 0.1380342	best: 0.1380342 (462)	total: 19.5s	remaining: 22.6s
463:	learn: 0.1360014	test: 0.1379803	best: 0.1379803 (463)	total: 19.6s	remaining: 22.6s
464:	learn: 0.1359624	test: 0.1379744	best: 0.1379744 (464)	total: 19.6s	remaining: 22.5s
465:	learn: 0.1359614	test: 0.1379748	best: 0.1379744 (464)	total: 19.6s	remaining: 22.5s
466:	learn: 0.1359177	test: 0.1379667	best: 0.1379667 (466)	total: 19.7s	remaining: 22.5s
467:	learn: 0.1358596	test: 0.1379287	best: 0.1379287 (467)	total: 19.7s	remaining: 22.4s
468:	learn: 0.1358514	test: 0.1379349	best: 0.1379287 (467)	total: 19.8s	remaining: 22.4s
469:	learn: 0.1358361	test: 0.1379274	best: 0.1379274 (469)	total: 19.8s	remaining: 22.4s
470:	learn: 0.1358262	test: 0.1379259	best: 0.1379259 (470)	total: 19.9s	remaining: 22.3s
471:	learn: 0.1357650	test: 0.1378854	best: 0.1378854 (471)	total: 19.9s	remaining: 22.3s
472:	learn: 0.1357551	test: 0.1378884	best: 0.1378854 (471)	total: 19.9s	remaining: 22.2s
473:	learn: 0.1357540	test: 0.1378891	best: 0.1378854 (471)	total: 20s	remaining: 22.2s
474:	learn: 0.1357289	test: 0.1378800	best: 0.1378800 (474)	total: 20s	remaining: 22.1s
475:	learn: 0.1356994	test: 0.1378876	best: 0.1378800 (474)	total: 20.1s	remaining: 22.1s
476:	learn: 0.1356596	test: 0.1378663	best: 0.1378663 (476)	total: 20.1s	remaining: 22.1s
477:	learn: 0.1356544	test: 0.1378701	best: 0.1378663 (476)	total: 20.2s	remaining: 22s
478:	learn: 0.1356504	test: 0.1378744	best: 0.1378663 (476)	total: 20.2s	remaining: 22s
479:	learn: 0.1356477	test: 0.1378666	best: 0.1378663 (476)	total: 20.2s	remaining: 21.9s
480:	learn: 0.1356472	test: 0.1378671	best: 0.1378663 (476)	total: 20.3s	remaining: 21.9s
481:	learn: 0.1356081	test: 0.1378056	best: 0.1378056 (481)	total: 20.3s	remaining: 21.8s
482:	learn: 0.1355628	test: 0.1377885	best: 0.1377885 (482)	total: 20.4s	remaining: 21.8s
483:	learn: 0.1355622	test: 0.1377887	best: 0.1377885 (482)	total: 20.4s	remaining: 21.7s
484:	learn: 0.1355317	test: 0.1377758	best: 0.1377758 (484)	total: 20.4s	remaining: 21.7s
485:	learn: 0.1354150	test: 0.1377105	best: 0.1377105 (485)	total: 20.5s	remaining: 21.6s
486:	learn: 0.1354140	test: 0.1377114	best: 0.1377105 (485)	total: 20.5s	remaining: 21.6s
487:	learn: 0.1353918	test: 0.1377245	best: 0.1377105 (485)	total: 20.6s	remaining: 21.6s
488:	learn: 0.1353892	test: 0.1377239	best: 0.1377105 (485)	total: 20.6s	remaining: 21.5s
489:	learn: 0.1353250	test: 0.1376680	best: 0.1376680 (489)	total: 20.6s	remaining: 21.5s
490:	learn: 0.1353237	test: 0.1376689	best: 0.1376680 (489)	total: 20.7s	remaining: 21.4s
491:	learn: 0.1353235	test: 0.1376689	best: 0.1376680 (489)	total: 20.7s	remaining: 21.4s
492:	learn: 0.1353232	test: 0.1376712	best: 0.1376680 (489)	total: 20.8s	remaining: 21.3s
493:	learn: 0.1353155	test: 0.1376800	best: 0.1376680 (489)	total: 20.8s	remaining: 21.3s
494:	learn: 0.1352964	test: 0.1376775	best: 0.1376680 (489)	total: 20.9s	remaining: 21.3s
495:	learn: 0.1352650	test: 0.1376674	best: 0.1376674 (495)	total: 20.9s	remaining: 21.3s
496:	learn: 0.1352647	test: 0.1376674	best: 0.1376674 (496)	total: 21s	remaining: 21.2s
497:	learn: 0.1352631	test: 0.1376644	best: 0.1376644 (497)	total: 21s	remaining: 21.2s
498:	learn: 0.1352011	test: 0.1376129	best: 0.1376129 (498)	total: 21s	remaining: 21.1s
499:	learn: 0.1351650	test: 0.1376564	best: 0.1376129 (498)	total: 21.1s	remaining: 21.1s
500:	learn: 0.1350999	test: 0.1376359	best: 0.1376129 (498)	total: 21.1s	remaining: 21s
501:	learn: 0.1350749	test: 0.1376517	best: 0.1376129 (498)	total: 21.2s	remaining: 21s
502:	learn: 0.1350301	test: 0.1376244	best: 0.1376129 (498)	total: 21.2s	remaining: 20.9s
503:	learn: 0.1350259	test: 0.1376253	best: 0.1376129 (498)	total: 21.2s	remaining: 20.9s
504:	learn: 0.1350011	test: 0.1376148	best: 0.1376129 (498)	total: 21.3s	remaining: 20.9s
505:	learn: 0.1349946	test: 0.1376200	best: 0.1376129 (498)	total: 21.3s	remaining: 20.8s
506:	learn: 0.1349946	test: 0.1376200	best: 0.1376129 (498)	total: 21.4s	remaining: 20.8s
507:	learn: 0.1349941	test: 0.1376211	best: 0.1376129 (498)	total: 21.4s	remaining: 20.7s
508:	learn: 0.1349939	test: 0.1376219	best: 0.1376129 (498)	total: 21.4s	remaining: 20.7s
509:	learn: 0.1349779	test: 0.1376221	best: 0.1376129 (498)	total: 21.5s	remaining: 20.6s
510:	learn: 0.1349776	test: 0.1376239	best: 0.1376129 (498)	total: 21.5s	remaining: 20.6s
511:	learn: 0.1349766	test: 0.1376234	best: 0.1376129 (498)	total: 21.6s	remaining: 20.5s
512:	learn: 0.1349680	test: 0.1376204	best: 0.1376129 (498)	total: 21.6s	remaining: 20.5s
513:	learn: 0.1349570	test: 0.1376106	best: 0.1376106 (513)	total: 21.6s	remaining: 20.5s
514:	learn: 0.1349534	test: 0.1376077	best: 0.1376077 (514)	total: 21.7s	remaining: 20.4s
515:	learn: 0.1348381	test: 0.1374591	best: 0.1374591 (515)	total: 21.7s	remaining: 20.4s
516:	learn: 0.1348151	test: 0.1374589	best: 0.1374589 (516)	total: 21.8s	remaining: 20.3s
517:	learn: 0.1347584	test: 0.1374337	best: 0.1374337 (517)	total: 21.8s	remaining: 20.3s
518:	learn: 0.1347513	test: 0.1374199	best: 0.1374199 (518)	total: 21.9s	remaining: 20.3s
519:	learn: 0.1347441	test: 0.1374251	best: 0.1374199 (518)	total: 21.9s	remaining: 20.2s
520:	learn: 0.1347368	test: 0.1374184	best: 0.1374184 (520)	total: 22s	remaining: 20.2s
521:	learn: 0.1347341	test: 0.1374193	best: 0.1374184 (520)	total: 22s	remaining: 20.1s
522:	learn: 0.1346959	test: 0.1374176	best: 0.1374176 (522)	total: 22s	remaining: 20.1s
523:	learn: 0.1346959	test: 0.1374175	best: 0.1374175 (523)	total: 22.1s	remaining: 20.1s
524:	learn: 0.1346569	test: 0.1374430	best: 0.1374175 (523)	total: 22.1s	remaining: 20s
525:	learn: 0.1346418	test: 0.1374545	best: 0.1374175 (523)	total: 22.2s	remaining: 20s
526:	learn: 0.1346386	test: 0.1374582	best: 0.1374175 (523)	total: 22.2s	remaining: 19.9s
527:	learn: 0.1345496	test: 0.1374130	best: 0.1374130 (527)	total: 22.2s	remaining: 19.9s
528:	learn: 0.1345435	test: 0.1374103	best: 0.1374103 (528)	total: 22.3s	remaining: 19.8s
529:	learn: 0.1345429	test: 0.1374085	best: 0.1374085 (529)	total: 22.3s	remaining: 19.8s
530:	learn: 0.1345332	test: 0.1374062	best: 0.1374062 (530)	total: 22.4s	remaining: 19.7s
531:	learn: 0.1345264	test: 0.1374092	best: 0.1374062 (530)	total: 22.4s	remaining: 19.7s
532:	learn: 0.1345224	test: 0.1374104	best: 0.1374062 (530)	total: 22.4s	remaining: 19.7s
533:	learn: 0.1345113	test: 0.1374078	best: 0.1374062 (530)	total: 22.5s	remaining: 19.6s
534:	learn: 0.1345033	test: 0.1374028	best: 0.1374028 (534)	total: 22.5s	remaining: 19.6s
535:	learn: 0.1344987	test: 0.1374106	best: 0.1374028 (534)	total: 22.6s	remaining: 19.5s
536:	learn: 0.1344980	test: 0.1374121	best: 0.1374028 (534)	total: 22.6s	remaining: 19.5s
537:	learn: 0.1344390	test: 0.1373995	best: 0.1373995 (537)	total: 22.6s	remaining: 19.4s
538:	learn: 0.1344332	test: 0.1373946	best: 0.1373946 (538)	total: 22.7s	remaining: 19.4s
539:	learn: 0.1344235	test: 0.1373913	best: 0.1373913 (539)	total: 22.7s	remaining: 19.4s
540:	learn: 0.1344063	test: 0.1373959	best: 0.1373913 (539)	total: 22.8s	remaining: 19.3s
541:	learn: 0.1343972	test: 0.1373897	best: 0.1373897 (541)	total: 22.8s	remaining: 19.3s
542:	learn: 0.1343934	test: 0.1374019	best: 0.1373897 (541)	total: 22.9s	remaining: 19.2s
543:	learn: 0.1343933	test: 0.1374020	best: 0.1373897 (541)	total: 22.9s	remaining: 19.2s
544:	learn: 0.1343822	test: 0.1373896	best: 0.1373896 (544)	total: 23s	remaining: 19.2s
545:	learn: 0.1343780	test: 0.1373955	best: 0.1373896 (544)	total: 23s	remaining: 19.1s
546:	learn: 0.1343754	test: 0.1373982	best: 0.1373896 (544)	total: 23s	remaining: 19.1s
547:	learn: 0.1343638	test: 0.1374006	best: 0.1373896 (544)	total: 23.1s	remaining: 19s
548:	learn: 0.1342788	test: 0.1373207	best: 0.1373207 (548)	total: 23.1s	remaining: 19s
549:	learn: 0.1342685	test: 0.1373203	best: 0.1373203 (549)	total: 23.2s	remaining: 18.9s
550:	learn: 0.1342652	test: 0.1373262	best: 0.1373203 (549)	total: 23.2s	remaining: 18.9s
551:	learn: 0.1342499	test: 0.1373036	best: 0.1373036 (551)	total: 23.2s	remaining: 18.9s
552:	learn: 0.1342349	test: 0.1372990	best: 0.1372990 (552)	total: 23.3s	remaining: 18.8s
553:	learn: 0.1342247	test: 0.1372953	best: 0.1372953 (553)	total: 23.3s	remaining: 18.8s
554:	learn: 0.1342237	test: 0.1372973	best: 0.1372953 (553)	total: 23.4s	remaining: 18.7s
555:	learn: 0.1342182	test: 0.1373016	best: 0.1372953 (553)	total: 23.4s	remaining: 18.7s
556:	learn: 0.1341878	test: 0.1372998	best: 0.1372953 (553)	total: 23.4s	remaining: 18.6s
557:	learn: 0.1341848	test: 0.1373066	best: 0.1372953 (553)	total: 23.5s	remaining: 18.6s
558:	learn: 0.1341846	test: 0.1373071	best: 0.1372953 (553)	total: 23.5s	remaining: 18.6s
559:	learn: 0.1341737	test: 0.1372910	best: 0.1372910 (559)	total: 23.6s	remaining: 18.5s
560:	learn: 0.1341725	test: 0.1372881	best: 0.1372881 (560)	total: 23.6s	remaining: 18.5s
561:	learn: 0.1341706	test: 0.1372898	best: 0.1372881 (560)	total: 23.6s	remaining: 18.4s
562:	learn: 0.1341646	test: 0.1372885	best: 0.1372881 (560)	total: 23.7s	remaining: 18.4s
563:	learn: 0.1341288	test: 0.1372628	best: 0.1372628 (563)	total: 23.7s	remaining: 18.3s
564:	learn: 0.1341283	test: 0.1372640	best: 0.1372628 (563)	total: 23.7s	remaining: 18.3s
565:	learn: 0.1341015	test: 0.1372344	best: 0.1372344 (565)	total: 23.8s	remaining: 18.2s
566:	learn: 0.1341003	test: 0.1372324	best: 0.1372324 (566)	total: 23.8s	remaining: 18.2s
567:	learn: 0.1340938	test: 0.1372268	best: 0.1372268 (567)	total: 23.9s	remaining: 18.2s
568:	learn: 0.1340621	test: 0.1371971	best: 0.1371971 (568)	total: 23.9s	remaining: 18.1s
569:	learn: 0.1340407	test: 0.1371978	best: 0.1371971 (568)	total: 24s	remaining: 18.1s
570:	learn: 0.1340252	test: 0.1372012	best: 0.1371971 (568)	total: 24s	remaining: 18s
571:	learn: 0.1340191	test: 0.1371980	best: 0.1371971 (568)	total: 24.1s	remaining: 18s
572:	learn: 0.1340055	test: 0.1371891	best: 0.1371891 (572)	total: 24.1s	remaining: 18s
573:	learn: 0.1338926	test: 0.1370731	best: 0.1370731 (573)	total: 24.1s	remaining: 17.9s
574:	learn: 0.1338758	test: 0.1370709	best: 0.1370709 (574)	total: 24.2s	remaining: 17.9s
575:	learn: 0.1338599	test: 0.1370793	best: 0.1370709 (574)	total: 24.2s	remaining: 17.8s
576:	learn: 0.1338372	test: 0.1370741	best: 0.1370709 (574)	total: 24.3s	remaining: 17.8s
577:	learn: 0.1338369	test: 0.1370726	best: 0.1370709 (574)	total: 24.3s	remaining: 17.7s
578:	learn: 0.1338274	test: 0.1370795	best: 0.1370709 (574)	total: 24.3s	remaining: 17.7s
579:	learn: 0.1338123	test: 0.1370870	best: 0.1370709 (574)	total: 24.4s	remaining: 17.7s
580:	learn: 0.1338103	test: 0.1370912	best: 0.1370709 (574)	total: 24.4s	remaining: 17.6s
581:	learn: 0.1338089	test: 0.1370891	best: 0.1370709 (574)	total: 24.5s	remaining: 17.6s
582:	learn: 0.1338060	test: 0.1370963	best: 0.1370709 (574)	total: 24.5s	remaining: 17.5s
583:	learn: 0.1338048	test: 0.1370960	best: 0.1370709 (574)	total: 24.5s	remaining: 17.5s
584:	learn: 0.1338045	test: 0.1370959	best: 0.1370709 (574)	total: 24.6s	remaining: 17.4s
585:	learn: 0.1337856	test: 0.1370887	best: 0.1370709 (574)	total: 24.6s	remaining: 17.4s
586:	learn: 0.1337847	test: 0.1370869	best: 0.1370709 (574)	total: 24.7s	remaining: 17.3s
587:	learn: 0.1337816	test: 0.1370905	best: 0.1370709 (574)	total: 24.7s	remaining: 17.3s
588:	learn: 0.1337616	test: 0.1370934	best: 0.1370709 (574)	total: 24.7s	remaining: 17.3s
589:	learn: 0.1337609	test: 0.1370939	best: 0.1370709 (574)	total: 24.8s	remaining: 17.2s
590:	learn: 0.1337399	test: 0.1370964	best: 0.1370709 (574)	total: 24.8s	remaining: 17.2s
591:	learn: 0.1337257	test: 0.1370921	best: 0.1370709 (574)	total: 24.9s	remaining: 17.1s
592:	learn: 0.1337184	test: 0.1370962	best: 0.1370709 (574)	total: 24.9s	remaining: 17.1s
593:	learn: 0.1337062	test: 0.1370917	best: 0.1370709 (574)	total: 24.9s	remaining: 17s
594:	learn: 0.1337057	test: 0.1370903	best: 0.1370709 (574)	total: 25s	remaining: 17s
595:	learn: 0.1337050	test: 0.1370896	best: 0.1370709 (574)	total: 25.1s	remaining: 17s
596:	learn: 0.1337023	test: 0.1370876	best: 0.1370709 (574)	total: 25.1s	remaining: 17s
597:	learn: 0.1336963	test: 0.1370746	best: 0.1370709 (574)	total: 25.2s	remaining: 16.9s
598:	learn: 0.1336922	test: 0.1370710	best: 0.1370709 (574)	total: 25.2s	remaining: 16.9s
599:	learn: 0.1336919	test: 0.1370709	best: 0.1370709 (574)	total: 25.2s	remaining: 16.8s
600:	learn: 0.1336893	test: 0.1370727	best: 0.1370709 (574)	total: 25.3s	remaining: 16.8s
601:	learn: 0.1336498	test: 0.1370674	best: 0.1370674 (601)	total: 25.3s	remaining: 16.7s
602:	learn: 0.1336395	test: 0.1370692	best: 0.1370674 (601)	total: 25.4s	remaining: 16.7s
603:	learn: 0.1336217	test: 0.1370620	best: 0.1370620 (603)	total: 25.4s	remaining: 16.7s
604:	learn: 0.1336205	test: 0.1370618	best: 0.1370618 (604)	total: 25.4s	remaining: 16.6s
605:	learn: 0.1335797	test: 0.1370552	best: 0.1370552 (605)	total: 25.5s	remaining: 16.6s
606:	learn: 0.1335512	test: 0.1370591	best: 0.1370552 (605)	total: 25.5s	remaining: 16.5s
607:	learn: 0.1335459	test: 0.1370516	best: 0.1370516 (607)	total: 25.6s	remaining: 16.5s
608:	learn: 0.1335457	test: 0.1370536	best: 0.1370516 (607)	total: 25.6s	remaining: 16.4s
609:	learn: 0.1335145	test: 0.1370683	best: 0.1370516 (607)	total: 25.6s	remaining: 16.4s
610:	learn: 0.1335053	test: 0.1370659	best: 0.1370516 (607)	total: 25.7s	remaining: 16.4s
611:	learn: 0.1334790	test: 0.1370377	best: 0.1370377 (611)	total: 25.7s	remaining: 16.3s
612:	learn: 0.1334736	test: 0.1370311	best: 0.1370311 (612)	total: 25.8s	remaining: 16.3s
613:	learn: 0.1334711	test: 0.1370350	best: 0.1370311 (612)	total: 25.8s	remaining: 16.2s
614:	learn: 0.1334502	test: 0.1370446	best: 0.1370311 (612)	total: 25.8s	remaining: 16.2s
615:	learn: 0.1334184	test: 0.1370108	best: 0.1370108 (615)	total: 25.9s	remaining: 16.1s
616:	learn: 0.1334028	test: 0.1370059	best: 0.1370059 (616)	total: 25.9s	remaining: 16.1s
617:	learn: 0.1334023	test: 0.1370062	best: 0.1370059 (616)	total: 26s	remaining: 16s
618:	learn: 0.1333975	test: 0.1370013	best: 0.1370013 (618)	total: 26.1s	remaining: 16s
619:	learn: 0.1333895	test: 0.1370049	best: 0.1370013 (618)	total: 26.1s	remaining: 16s
620:	learn: 0.1333876	test: 0.1370110	best: 0.1370013 (618)	total: 26.1s	remaining: 16s
621:	learn: 0.1333866	test: 0.1370113	best: 0.1370013 (618)	total: 26.2s	remaining: 15.9s
622:	learn: 0.1333386	test: 0.1370168	best: 0.1370013 (618)	total: 26.2s	remaining: 15.9s
623:	learn: 0.1333241	test: 0.1370118	best: 0.1370013 (618)	total: 26.3s	remaining: 15.8s
624:	learn: 0.1333213	test: 0.1370069	best: 0.1370013 (618)	total: 26.3s	remaining: 15.8s
625:	learn: 0.1332893	test: 0.1369700	best: 0.1369700 (625)	total: 26.3s	remaining: 15.7s
626:	learn: 0.1332411	test: 0.1369947	best: 0.1369700 (625)	total: 26.4s	remaining: 15.7s
627:	learn: 0.1332382	test: 0.1369933	best: 0.1369700 (625)	total: 26.4s	remaining: 15.7s
628:	learn: 0.1332260	test: 0.1369732	best: 0.1369700 (625)	total: 26.5s	remaining: 15.6s
629:	learn: 0.1332245	test: 0.1369725	best: 0.1369700 (625)	total: 26.5s	remaining: 15.6s
630:	learn: 0.1332245	test: 0.1369725	best: 0.1369700 (625)	total: 26.5s	remaining: 15.5s
631:	learn: 0.1332224	test: 0.1369725	best: 0.1369700 (625)	total: 26.6s	remaining: 15.5s
632:	learn: 0.1332224	test: 0.1369725	best: 0.1369700 (625)	total: 26.6s	remaining: 15.4s
633:	learn: 0.1332202	test: 0.1369759	best: 0.1369700 (625)	total: 26.6s	remaining: 15.4s
634:	learn: 0.1332186	test: 0.1369743	best: 0.1369700 (625)	total: 26.7s	remaining: 15.3s
635:	learn: 0.1332183	test: 0.1369750	best: 0.1369700 (625)	total: 26.7s	remaining: 15.3s
636:	learn: 0.1332164	test: 0.1369727	best: 0.1369700 (625)	total: 26.8s	remaining: 15.3s
637:	learn: 0.1332027	test: 0.1369768	best: 0.1369700 (625)	total: 26.8s	remaining: 15.2s
638:	learn: 0.1332020	test: 0.1369760	best: 0.1369700 (625)	total: 26.9s	remaining: 15.2s
639:	learn: 0.1332004	test: 0.1369796	best: 0.1369700 (625)	total: 26.9s	remaining: 15.1s
640:	learn: 0.1331937	test: 0.1369782	best: 0.1369700 (625)	total: 26.9s	remaining: 15.1s
641:	learn: 0.1331556	test: 0.1369572	best: 0.1369572 (641)	total: 27s	remaining: 15s
642:	learn: 0.1331244	test: 0.1369663	best: 0.1369572 (641)	total: 27s	remaining: 15s
643:	learn: 0.1331241	test: 0.1369670	best: 0.1369572 (641)	total: 27.1s	remaining: 15s
644:	learn: 0.1331239	test: 0.1369672	best: 0.1369572 (641)	total: 27.1s	remaining: 14.9s
645:	learn: 0.1331181	test: 0.1369663	best: 0.1369572 (641)	total: 27.2s	remaining: 14.9s
646:	learn: 0.1331143	test: 0.1369649	best: 0.1369572 (641)	total: 27.2s	remaining: 14.8s
647:	learn: 0.1331143	test: 0.1369649	best: 0.1369572 (641)	total: 27.3s	remaining: 14.8s
648:	learn: 0.1331139	test: 0.1369658	best: 0.1369572 (641)	total: 27.3s	remaining: 14.8s
649:	learn: 0.1330936	test: 0.1369602	best: 0.1369572 (641)	total: 27.3s	remaining: 14.7s
650:	learn: 0.1330787	test: 0.1369601	best: 0.1369572 (641)	total: 27.4s	remaining: 14.7s
651:	learn: 0.1330677	test: 0.1369512	best: 0.1369512 (651)	total: 27.4s	remaining: 14.6s
652:	learn: 0.1330614	test: 0.1369526	best: 0.1369512 (651)	total: 27.5s	remaining: 14.6s
653:	learn: 0.1330607	test: 0.1369519	best: 0.1369512 (651)	total: 27.5s	remaining: 14.5s
654:	learn: 0.1330589	test: 0.1369549	best: 0.1369512 (651)	total: 27.5s	remaining: 14.5s
655:	learn: 0.1330557	test: 0.1369538	best: 0.1369512 (651)	total: 27.6s	remaining: 14.5s
656:	learn: 0.1330556	test: 0.1369540	best: 0.1369512 (651)	total: 27.6s	remaining: 14.4s
657:	learn: 0.1330556	test: 0.1369540	best: 0.1369512 (651)	total: 27.6s	remaining: 14.4s
658:	learn: 0.1330545	test: 0.1369537	best: 0.1369512 (651)	total: 27.7s	remaining: 14.3s
659:	learn: 0.1330533	test: 0.1369532	best: 0.1369512 (651)	total: 27.7s	remaining: 14.3s
660:	learn: 0.1330406	test: 0.1369263	best: 0.1369263 (660)	total: 27.8s	remaining: 14.2s
661:	learn: 0.1330371	test: 0.1369254	best: 0.1369254 (661)	total: 27.8s	remaining: 14.2s
662:	learn: 0.1330162	test: 0.1369213	best: 0.1369213 (662)	total: 27.9s	remaining: 14.2s
663:	learn: 0.1330162	test: 0.1369213	best: 0.1369213 (663)	total: 27.9s	remaining: 14.1s
664:	learn: 0.1330159	test: 0.1369212	best: 0.1369212 (664)	total: 27.9s	remaining: 14.1s
665:	learn: 0.1329695	test: 0.1368754	best: 0.1368754 (665)	total: 28s	remaining: 14s
666:	learn: 0.1329493	test: 0.1368699	best: 0.1368699 (666)	total: 28s	remaining: 14s
667:	learn: 0.1329491	test: 0.1368714	best: 0.1368699 (666)	total: 28s	remaining: 13.9s
668:	learn: 0.1329404	test: 0.1368734	best: 0.1368699 (666)	total: 28.1s	remaining: 13.9s
669:	learn: 0.1329164	test: 0.1368646	best: 0.1368646 (669)	total: 28.2s	remaining: 13.9s
670:	learn: 0.1328807	test: 0.1368533	best: 0.1368533 (670)	total: 28.2s	remaining: 13.8s
671:	learn: 0.1327868	test: 0.1367565	best: 0.1367565 (671)	total: 28.3s	remaining: 13.8s
672:	learn: 0.1327868	test: 0.1367565	best: 0.1367565 (672)	total: 28.3s	remaining: 13.8s
673:	learn: 0.1327807	test: 0.1367592	best: 0.1367565 (672)	total: 28.3s	remaining: 13.7s
674:	learn: 0.1327797	test: 0.1367598	best: 0.1367565 (672)	total: 28.4s	remaining: 13.7s
675:	learn: 0.1327797	test: 0.1367598	best: 0.1367565 (672)	total: 28.4s	remaining: 13.6s
676:	learn: 0.1327767	test: 0.1367577	best: 0.1367565 (672)	total: 28.5s	remaining: 13.6s
677:	learn: 0.1327562	test: 0.1367647	best: 0.1367565 (672)	total: 28.5s	remaining: 13.5s
678:	learn: 0.1327467	test: 0.1367725	best: 0.1367565 (672)	total: 28.5s	remaining: 13.5s
679:	learn: 0.1327462	test: 0.1367722	best: 0.1367565 (672)	total: 28.6s	remaining: 13.5s
680:	learn: 0.1327250	test: 0.1367490	best: 0.1367490 (680)	total: 28.6s	remaining: 13.4s
681:	learn: 0.1327192	test: 0.1367437	best: 0.1367437 (681)	total: 28.7s	remaining: 13.4s
682:	learn: 0.1327190	test: 0.1367430	best: 0.1367430 (682)	total: 28.7s	remaining: 13.3s
683:	learn: 0.1326973	test: 0.1367304	best: 0.1367304 (683)	total: 28.7s	remaining: 13.3s
684:	learn: 0.1326868	test: 0.1367174	best: 0.1367174 (684)	total: 28.8s	remaining: 13.2s
685:	learn: 0.1326747	test: 0.1367240	best: 0.1367174 (684)	total: 28.8s	remaining: 13.2s
686:	learn: 0.1326670	test: 0.1367175	best: 0.1367174 (684)	total: 28.9s	remaining: 13.2s
687:	learn: 0.1326369	test: 0.1366974	best: 0.1366974 (687)	total: 28.9s	remaining: 13.1s
688:	learn: 0.1325795	test: 0.1366640	best: 0.1366640 (688)	total: 29s	remaining: 13.1s
689:	learn: 0.1325786	test: 0.1366654	best: 0.1366640 (688)	total: 29s	remaining: 13s
690:	learn: 0.1325759	test: 0.1366647	best: 0.1366640 (688)	total: 29s	remaining: 13s
691:	learn: 0.1325414	test: 0.1366628	best: 0.1366628 (691)	total: 29.1s	remaining: 12.9s
692:	learn: 0.1324871	test: 0.1366755	best: 0.1366628 (691)	total: 29.1s	remaining: 12.9s
693:	learn: 0.1324624	test: 0.1366623	best: 0.1366623 (693)	total: 29.2s	remaining: 12.9s
694:	learn: 0.1324604	test: 0.1366623	best: 0.1366623 (693)	total: 29.3s	remaining: 12.8s
695:	learn: 0.1324265	test: 0.1366403	best: 0.1366403 (695)	total: 29.3s	remaining: 12.8s
696:	learn: 0.1324171	test: 0.1366443	best: 0.1366403 (695)	total: 29.3s	remaining: 12.7s
697:	learn: 0.1324157	test: 0.1366429	best: 0.1366403 (695)	total: 29.4s	remaining: 12.7s
698:	learn: 0.1324155	test: 0.1366429	best: 0.1366403 (695)	total: 29.4s	remaining: 12.7s
699:	learn: 0.1324139	test: 0.1366434	best: 0.1366403 (695)	total: 29.4s	remaining: 12.6s
700:	learn: 0.1324133	test: 0.1366428	best: 0.1366403 (695)	total: 29.5s	remaining: 12.6s
701:	learn: 0.1324127	test: 0.1366429	best: 0.1366403 (695)	total: 29.5s	remaining: 12.5s
702:	learn: 0.1324009	test: 0.1366429	best: 0.1366403 (695)	total: 29.6s	remaining: 12.5s
703:	learn: 0.1323974	test: 0.1366431	best: 0.1366403 (695)	total: 29.6s	remaining: 12.4s
704:	learn: 0.1322925	test: 0.1365345	best: 0.1365345 (704)	total: 29.7s	remaining: 12.4s
705:	learn: 0.1322839	test: 0.1365408	best: 0.1365345 (704)	total: 29.7s	remaining: 12.4s
706:	learn: 0.1322731	test: 0.1365436	best: 0.1365345 (704)	total: 29.7s	remaining: 12.3s
707:	learn: 0.1322474	test: 0.1365443	best: 0.1365345 (704)	total: 29.8s	remaining: 12.3s
708:	learn: 0.1322253	test: 0.1365324	best: 0.1365324 (708)	total: 29.8s	remaining: 12.2s
709:	learn: 0.1322229	test: 0.1365330	best: 0.1365324 (708)	total: 29.8s	remaining: 12.2s
710:	learn: 0.1321728	test: 0.1365302	best: 0.1365302 (710)	total: 29.9s	remaining: 12.1s
711:	learn: 0.1321309	test: 0.1365064	best: 0.1365064 (711)	total: 29.9s	remaining: 12.1s
712:	learn: 0.1320793	test: 0.1364916	best: 0.1364916 (712)	total: 30s	remaining: 12.1s
713:	learn: 0.1320793	test: 0.1364916	best: 0.1364916 (712)	total: 30s	remaining: 12s
714:	learn: 0.1320385	test: 0.1364899	best: 0.1364899 (714)	total: 30.1s	remaining: 12s
715:	learn: 0.1320385	test: 0.1364899	best: 0.1364899 (714)	total: 30.1s	remaining: 11.9s
716:	learn: 0.1320194	test: 0.1364894	best: 0.1364894 (716)	total: 30.1s	remaining: 11.9s
717:	learn: 0.1320061	test: 0.1364930	best: 0.1364894 (716)	total: 30.2s	remaining: 11.9s
718:	learn: 0.1319670	test: 0.1364872	best: 0.1364872 (718)	total: 30.3s	remaining: 11.8s
719:	learn: 0.1319621	test: 0.1364905	best: 0.1364872 (718)	total: 30.3s	remaining: 11.8s
720:	learn: 0.1319613	test: 0.1364900	best: 0.1364872 (718)	total: 30.4s	remaining: 11.8s
721:	learn: 0.1319066	test: 0.1364946	best: 0.1364872 (718)	total: 30.4s	remaining: 11.7s
722:	learn: 0.1319046	test: 0.1364974	best: 0.1364872 (718)	total: 30.4s	remaining: 11.7s
723:	learn: 0.1319042	test: 0.1364966	best: 0.1364872 (718)	total: 30.5s	remaining: 11.6s
724:	learn: 0.1318997	test: 0.1365000	best: 0.1364872 (718)	total: 30.5s	remaining: 11.6s
725:	learn: 0.1318861	test: 0.1364896	best: 0.1364872 (718)	total: 30.6s	remaining: 11.5s
726:	learn: 0.1318710	test: 0.1364854	best: 0.1364854 (726)	total: 30.6s	remaining: 11.5s
727:	learn: 0.1318595	test: 0.1364873	best: 0.1364854 (726)	total: 30.6s	remaining: 11.4s
728:	learn: 0.1318473	test: 0.1364931	best: 0.1364854 (726)	total: 30.7s	remaining: 11.4s
729:	learn: 0.1318472	test: 0.1364934	best: 0.1364854 (726)	total: 30.7s	remaining: 11.4s
730:	learn: 0.1318452	test: 0.1364896	best: 0.1364854 (726)	total: 30.8s	remaining: 11.3s
731:	learn: 0.1318386	test: 0.1364868	best: 0.1364854 (726)	total: 30.8s	remaining: 11.3s
732:	learn: 0.1318300	test: 0.1364763	best: 0.1364763 (732)	total: 30.9s	remaining: 11.2s
733:	learn: 0.1318199	test: 0.1364847	best: 0.1364763 (732)	total: 30.9s	remaining: 11.2s
734:	learn: 0.1318136	test: 0.1364937	best: 0.1364763 (732)	total: 30.9s	remaining: 11.2s
735:	learn: 0.1317913	test: 0.1364836	best: 0.1364763 (732)	total: 31s	remaining: 11.1s
736:	learn: 0.1317853	test: 0.1364800	best: 0.1364763 (732)	total: 31s	remaining: 11.1s
737:	learn: 0.1317846	test: 0.1364773	best: 0.1364763 (732)	total: 31.1s	remaining: 11s
738:	learn: 0.1317791	test: 0.1364682	best: 0.1364682 (738)	total: 31.1s	remaining: 11s
739:	learn: 0.1316830	test: 0.1363857	best: 0.1363857 (739)	total: 31.1s	remaining: 10.9s
740:	learn: 0.1316253	test: 0.1363162	best: 0.1363162 (740)	total: 31.2s	remaining: 10.9s
741:	learn: 0.1316244	test: 0.1363167	best: 0.1363162 (740)	total: 31.2s	remaining: 10.9s
742:	learn: 0.1315916	test: 0.1363085	best: 0.1363085 (742)	total: 31.3s	remaining: 10.8s
743:	learn: 0.1315794	test: 0.1363134	best: 0.1363085 (742)	total: 31.3s	remaining: 10.8s
744:	learn: 0.1315750	test: 0.1363182	best: 0.1363085 (742)	total: 31.4s	remaining: 10.7s
745:	learn: 0.1315730	test: 0.1363228	best: 0.1363085 (742)	total: 31.4s	remaining: 10.7s
746:	learn: 0.1315115	test: 0.1363145	best: 0.1363085 (742)	total: 31.5s	remaining: 10.7s
747:	learn: 0.1315078	test: 0.1363180	best: 0.1363085 (742)	total: 31.5s	remaining: 10.6s
748:	learn: 0.1315070	test: 0.1363183	best: 0.1363085 (742)	total: 31.6s	remaining: 10.6s
749:	learn: 0.1314958	test: 0.1363242	best: 0.1363085 (742)	total: 31.6s	remaining: 10.5s
750:	learn: 0.1314944	test: 0.1363260	best: 0.1363085 (742)	total: 31.6s	remaining: 10.5s
751:	learn: 0.1314795	test: 0.1363247	best: 0.1363085 (742)	total: 31.7s	remaining: 10.4s
752:	learn: 0.1314717	test: 0.1363122	best: 0.1363085 (742)	total: 31.7s	remaining: 10.4s
753:	learn: 0.1314670	test: 0.1363210	best: 0.1363085 (742)	total: 31.7s	remaining: 10.4s
754:	learn: 0.1314506	test: 0.1363223	best: 0.1363085 (742)	total: 31.8s	remaining: 10.3s
755:	learn: 0.1314494	test: 0.1363231	best: 0.1363085 (742)	total: 31.8s	remaining: 10.3s
756:	learn: 0.1314477	test: 0.1363239	best: 0.1363085 (742)	total: 31.9s	remaining: 10.2s
757:	learn: 0.1314469	test: 0.1363248	best: 0.1363085 (742)	total: 31.9s	remaining: 10.2s
758:	learn: 0.1314284	test: 0.1363340	best: 0.1363085 (742)	total: 31.9s	remaining: 10.1s
759:	learn: 0.1314274	test: 0.1363331	best: 0.1363085 (742)	total: 32s	remaining: 10.1s
760:	learn: 0.1314260	test: 0.1363292	best: 0.1363085 (742)	total: 32s	remaining: 10.1s
761:	learn: 0.1314059	test: 0.1363297	best: 0.1363085 (742)	total: 32.1s	remaining: 10s
762:	learn: 0.1314032	test: 0.1363299	best: 0.1363085 (742)	total: 32.1s	remaining: 9.97s
763:	learn: 0.1313999	test: 0.1363325	best: 0.1363085 (742)	total: 32.1s	remaining: 9.93s
764:	learn: 0.1313961	test: 0.1363268	best: 0.1363085 (742)	total: 32.2s	remaining: 9.88s
765:	learn: 0.1313936	test: 0.1363260	best: 0.1363085 (742)	total: 32.2s	remaining: 9.84s
766:	learn: 0.1313927	test: 0.1363264	best: 0.1363085 (742)	total: 32.3s	remaining: 9.8s
767:	learn: 0.1313902	test: 0.1363258	best: 0.1363085 (742)	total: 32.3s	remaining: 9.76s
768:	learn: 0.1313769	test: 0.1363240	best: 0.1363085 (742)	total: 32.3s	remaining: 9.72s
769:	learn: 0.1313655	test: 0.1363229	best: 0.1363085 (742)	total: 32.4s	remaining: 9.69s
770:	learn: 0.1313568	test: 0.1363137	best: 0.1363085 (742)	total: 32.5s	remaining: 9.64s
771:	learn: 0.1313436	test: 0.1363048	best: 0.1363048 (771)	total: 32.5s	remaining: 9.6s
772:	learn: 0.1313323	test: 0.1363021	best: 0.1363021 (772)	total: 32.6s	remaining: 9.56s
773:	learn: 0.1313260	test: 0.1363022	best: 0.1363021 (772)	total: 32.6s	remaining: 9.52s
774:	learn: 0.1313256	test: 0.1363035	best: 0.1363021 (772)	total: 32.6s	remaining: 9.47s
775:	learn: 0.1313255	test: 0.1363038	best: 0.1363021 (772)	total: 32.7s	remaining: 9.43s
776:	learn: 0.1313249	test: 0.1363045	best: 0.1363021 (772)	total: 32.7s	remaining: 9.39s
777:	learn: 0.1313181	test: 0.1362960	best: 0.1362960 (777)	total: 32.8s	remaining: 9.35s
778:	learn: 0.1312820	test: 0.1362835	best: 0.1362835 (778)	total: 32.8s	remaining: 9.3s
779:	learn: 0.1312819	test: 0.1362845	best: 0.1362835 (778)	total: 32.8s	remaining: 9.26s
780:	learn: 0.1312217	test: 0.1362592	best: 0.1362592 (780)	total: 32.9s	remaining: 9.22s
781:	learn: 0.1312204	test: 0.1362591	best: 0.1362591 (781)	total: 32.9s	remaining: 9.18s
782:	learn: 0.1312124	test: 0.1362619	best: 0.1362591 (781)	total: 33s	remaining: 9.13s
783:	learn: 0.1312014	test: 0.1362465	best: 0.1362465 (783)	total: 33s	remaining: 9.09s
784:	learn: 0.1311994	test: 0.1362463	best: 0.1362463 (784)	total: 33s	remaining: 9.05s
785:	learn: 0.1311958	test: 0.1362503	best: 0.1362463 (784)	total: 33.1s	remaining: 9.01s
786:	learn: 0.1311820	test: 0.1362549	best: 0.1362463 (784)	total: 33.1s	remaining: 8.96s
787:	learn: 0.1311717	test: 0.1362582	best: 0.1362463 (784)	total: 33.2s	remaining: 8.92s
788:	learn: 0.1311541	test: 0.1362631	best: 0.1362463 (784)	total: 33.2s	remaining: 8.88s
789:	learn: 0.1311449	test: 0.1362656	best: 0.1362463 (784)	total: 33.2s	remaining: 8.84s
790:	learn: 0.1311273	test: 0.1362792	best: 0.1362463 (784)	total: 33.3s	remaining: 8.79s
791:	learn: 0.1311205	test: 0.1362721	best: 0.1362463 (784)	total: 33.3s	remaining: 8.75s
792:	learn: 0.1310934	test: 0.1362618	best: 0.1362463 (784)	total: 33.4s	remaining: 8.71s
793:	learn: 0.1310876	test: 0.1362517	best: 0.1362463 (784)	total: 33.4s	remaining: 8.67s
794:	learn: 0.1310806	test: 0.1362552	best: 0.1362463 (784)	total: 33.5s	remaining: 8.64s
795:	learn: 0.1310765	test: 0.1362603	best: 0.1362463 (784)	total: 33.6s	remaining: 8.6s
796:	learn: 0.1310564	test: 0.1362535	best: 0.1362463 (784)	total: 33.6s	remaining: 8.55s
797:	learn: 0.1309984	test: 0.1362085	best: 0.1362085 (797)	total: 33.6s	remaining: 8.51s
798:	learn: 0.1309925	test: 0.1362120	best: 0.1362085 (797)	total: 33.7s	remaining: 8.47s
799:	learn: 0.1309698	test: 0.1362103	best: 0.1362085 (797)	total: 33.7s	remaining: 8.43s
800:	learn: 0.1309604	test: 0.1362108	best: 0.1362085 (797)	total: 33.8s	remaining: 8.39s
801:	learn: 0.1309597	test: 0.1362115	best: 0.1362085 (797)	total: 33.8s	remaining: 8.35s
802:	learn: 0.1309593	test: 0.1362123	best: 0.1362085 (797)	total: 33.8s	remaining: 8.3s
803:	learn: 0.1309498	test: 0.1362081	best: 0.1362081 (803)	total: 33.9s	remaining: 8.26s
804:	learn: 0.1309454	test: 0.1362118	best: 0.1362081 (803)	total: 33.9s	remaining: 8.22s
805:	learn: 0.1309383	test: 0.1362210	best: 0.1362081 (803)	total: 34s	remaining: 8.18s
806:	learn: 0.1309185	test: 0.1361993	best: 0.1361993 (806)	total: 34s	remaining: 8.13s
807:	learn: 0.1309172	test: 0.1361990	best: 0.1361990 (807)	total: 34s	remaining: 8.09s
808:	learn: 0.1309172	test: 0.1361990	best: 0.1361990 (808)	total: 34.1s	remaining: 8.04s
809:	learn: 0.1309099	test: 0.1361981	best: 0.1361981 (809)	total: 34.1s	remaining: 8s
810:	learn: 0.1308922	test: 0.1362105	best: 0.1361981 (809)	total: 34.2s	remaining: 7.96s
811:	learn: 0.1308877	test: 0.1362064	best: 0.1361981 (809)	total: 34.2s	remaining: 7.92s
812:	learn: 0.1308705	test: 0.1362130	best: 0.1361981 (809)	total: 34.2s	remaining: 7.88s
813:	learn: 0.1308601	test: 0.1362112	best: 0.1361981 (809)	total: 34.3s	remaining: 7.83s
814:	learn: 0.1308404	test: 0.1362138	best: 0.1361981 (809)	total: 34.3s	remaining: 7.79s
815:	learn: 0.1308395	test: 0.1362123	best: 0.1361981 (809)	total: 34.4s	remaining: 7.75s
816:	learn: 0.1308379	test: 0.1362148	best: 0.1361981 (809)	total: 34.4s	remaining: 7.7s
817:	learn: 0.1308378	test: 0.1362149	best: 0.1361981 (809)	total: 34.4s	remaining: 7.66s
818:	learn: 0.1308349	test: 0.1362145	best: 0.1361981 (809)	total: 34.5s	remaining: 7.62s
819:	learn: 0.1308287	test: 0.1362038	best: 0.1361981 (809)	total: 34.6s	remaining: 7.59s
820:	learn: 0.1308287	test: 0.1362038	best: 0.1361981 (809)	total: 34.6s	remaining: 7.55s
821:	learn: 0.1308282	test: 0.1362063	best: 0.1361981 (809)	total: 34.7s	remaining: 7.5s
822:	learn: 0.1307576	test: 0.1361573	best: 0.1361573 (822)	total: 34.7s	remaining: 7.46s
823:	learn: 0.1307576	test: 0.1361573	best: 0.1361573 (823)	total: 34.7s	remaining: 7.42s
824:	learn: 0.1307348	test: 0.1361485	best: 0.1361485 (824)	total: 34.8s	remaining: 7.37s
825:	learn: 0.1306953	test: 0.1361410	best: 0.1361410 (825)	total: 34.8s	remaining: 7.33s
826:	learn: 0.1306611	test: 0.1361336	best: 0.1361336 (826)	total: 34.8s	remaining: 7.29s
827:	learn: 0.1306606	test: 0.1361327	best: 0.1361327 (827)	total: 34.9s	remaining: 7.25s
828:	learn: 0.1306595	test: 0.1361370	best: 0.1361327 (827)	total: 34.9s	remaining: 7.21s
829:	learn: 0.1306401	test: 0.1361554	best: 0.1361327 (827)	total: 35s	remaining: 7.16s
830:	learn: 0.1306393	test: 0.1361578	best: 0.1361327 (827)	total: 35s	remaining: 7.12s
831:	learn: 0.1306366	test: 0.1361583	best: 0.1361327 (827)	total: 35s	remaining: 7.08s
832:	learn: 0.1306359	test: 0.1361590	best: 0.1361327 (827)	total: 35.1s	remaining: 7.03s
833:	learn: 0.1306308	test: 0.1361548	best: 0.1361327 (827)	total: 35.1s	remaining: 6.99s
834:	learn: 0.1306168	test: 0.1361440	best: 0.1361327 (827)	total: 35.2s	remaining: 6.95s
835:	learn: 0.1306139	test: 0.1361369	best: 0.1361327 (827)	total: 35.2s	remaining: 6.91s
836:	learn: 0.1306139	test: 0.1361369	best: 0.1361327 (827)	total: 35.3s	remaining: 6.86s
837:	learn: 0.1306135	test: 0.1361379	best: 0.1361327 (827)	total: 35.3s	remaining: 6.82s
838:	learn: 0.1306126	test: 0.1361395	best: 0.1361327 (827)	total: 35.3s	remaining: 6.78s
839:	learn: 0.1306126	test: 0.1361394	best: 0.1361327 (827)	total: 35.4s	remaining: 6.73s
840:	learn: 0.1306078	test: 0.1361395	best: 0.1361327 (827)	total: 35.4s	remaining: 6.69s
841:	learn: 0.1305978	test: 0.1361471	best: 0.1361327 (827)	total: 35.4s	remaining: 6.65s
842:	learn: 0.1305978	test: 0.1361471	best: 0.1361327 (827)	total: 35.5s	remaining: 6.61s
843:	learn: 0.1305883	test: 0.1361463	best: 0.1361327 (827)	total: 35.5s	remaining: 6.56s
844:	learn: 0.1305596	test: 0.1361308	best: 0.1361308 (844)	total: 35.6s	remaining: 6.53s
845:	learn: 0.1305590	test: 0.1361311	best: 0.1361308 (844)	total: 35.7s	remaining: 6.5s
846:	learn: 0.1305540	test: 0.1361252	best: 0.1361252 (846)	total: 35.7s	remaining: 6.45s
847:	learn: 0.1305540	test: 0.1361252	best: 0.1361252 (847)	total: 35.8s	remaining: 6.41s
848:	learn: 0.1305538	test: 0.1361262	best: 0.1361252 (847)	total: 35.8s	remaining: 6.37s
849:	learn: 0.1305533	test: 0.1361261	best: 0.1361252 (847)	total: 35.9s	remaining: 6.33s
850:	learn: 0.1305521	test: 0.1361275	best: 0.1361252 (847)	total: 35.9s	remaining: 6.28s
851:	learn: 0.1305521	test: 0.1361275	best: 0.1361252 (847)	total: 35.9s	remaining: 6.24s
852:	learn: 0.1305274	test: 0.1361042	best: 0.1361042 (852)	total: 36s	remaining: 6.2s
853:	learn: 0.1305023	test: 0.1360859	best: 0.1360859 (853)	total: 36s	remaining: 6.16s
854:	learn: 0.1305023	test: 0.1360859	best: 0.1360859 (853)	total: 36s	remaining: 6.11s
855:	learn: 0.1304994	test: 0.1360862	best: 0.1360859 (853)	total: 36.1s	remaining: 6.07s
856:	learn: 0.1304994	test: 0.1360862	best: 0.1360859 (853)	total: 36.1s	remaining: 6.02s
857:	learn: 0.1304987	test: 0.1360871	best: 0.1360859 (853)	total: 36.1s	remaining: 5.98s
858:	learn: 0.1304961	test: 0.1360880	best: 0.1360859 (853)	total: 36.2s	remaining: 5.94s
859:	learn: 0.1304895	test: 0.1360968	best: 0.1360859 (853)	total: 36.2s	remaining: 5.9s
860:	learn: 0.1304837	test: 0.1361061	best: 0.1360859 (853)	total: 36.3s	remaining: 5.85s
861:	learn: 0.1304661	test: 0.1360815	best: 0.1360815 (861)	total: 36.3s	remaining: 5.81s
862:	learn: 0.1304529	test: 0.1360830	best: 0.1360815 (861)	total: 36.3s	remaining: 5.77s
863:	learn: 0.1304296	test: 0.1360716	best: 0.1360716 (863)	total: 36.4s	remaining: 5.72s
864:	learn: 0.1304273	test: 0.1360715	best: 0.1360715 (864)	total: 36.4s	remaining: 5.68s
865:	learn: 0.1304132	test: 0.1360710	best: 0.1360710 (865)	total: 36.5s	remaining: 5.64s
866:	learn: 0.1304125	test: 0.1360693	best: 0.1360693 (866)	total: 36.5s	remaining: 5.6s
867:	learn: 0.1304085	test: 0.1360683	best: 0.1360683 (867)	total: 36.5s	remaining: 5.56s
868:	learn: 0.1304084	test: 0.1360683	best: 0.1360683 (868)	total: 36.6s	remaining: 5.51s
869:	learn: 0.1304077	test: 0.1360700	best: 0.1360683 (868)	total: 36.6s	remaining: 5.47s
870:	learn: 0.1304050	test: 0.1360675	best: 0.1360675 (870)	total: 36.7s	remaining: 5.43s
871:	learn: 0.1304018	test: 0.1360742	best: 0.1360675 (870)	total: 36.8s	remaining: 5.4s
872:	learn: 0.1304014	test: 0.1360748	best: 0.1360675 (870)	total: 36.8s	remaining: 5.36s
873:	learn: 0.1304003	test: 0.1360752	best: 0.1360675 (870)	total: 36.9s	remaining: 5.31s
874:	learn: 0.1303676	test: 0.1360647	best: 0.1360647 (874)	total: 36.9s	remaining: 5.27s
875:	learn: 0.1303656	test: 0.1360617	best: 0.1360617 (875)	total: 36.9s	remaining: 5.23s
876:	learn: 0.1303520	test: 0.1360507	best: 0.1360507 (876)	total: 37s	remaining: 5.18s
877:	learn: 0.1303398	test: 0.1360517	best: 0.1360507 (876)	total: 37s	remaining: 5.14s
878:	learn: 0.1303193	test: 0.1360553	best: 0.1360507 (876)	total: 37.1s	remaining: 5.1s
879:	learn: 0.1303188	test: 0.1360549	best: 0.1360507 (876)	total: 37.1s	remaining: 5.06s
880:	learn: 0.1302986	test: 0.1360542	best: 0.1360507 (876)	total: 37.1s	remaining: 5.02s
881:	learn: 0.1302985	test: 0.1360552	best: 0.1360507 (876)	total: 37.2s	remaining: 4.97s
882:	learn: 0.1302870	test: 0.1360543	best: 0.1360507 (876)	total: 37.2s	remaining: 4.93s
883:	learn: 0.1302684	test: 0.1360505	best: 0.1360505 (883)	total: 37.3s	remaining: 4.89s
884:	learn: 0.1302663	test: 0.1360521	best: 0.1360505 (883)	total: 37.3s	remaining: 4.85s
885:	learn: 0.1302648	test: 0.1360539	best: 0.1360505 (883)	total: 37.3s	remaining: 4.8s
886:	learn: 0.1302643	test: 0.1360566	best: 0.1360505 (883)	total: 37.4s	remaining: 4.76s
887:	learn: 0.1302639	test: 0.1360552	best: 0.1360505 (883)	total: 37.4s	remaining: 4.72s
888:	learn: 0.1302550	test: 0.1360353	best: 0.1360353 (888)	total: 37.5s	remaining: 4.68s
889:	learn: 0.1302539	test: 0.1360338	best: 0.1360338 (889)	total: 37.5s	remaining: 4.63s
890:	learn: 0.1302538	test: 0.1360337	best: 0.1360337 (890)	total: 37.5s	remaining: 4.59s
891:	learn: 0.1302537	test: 0.1360332	best: 0.1360332 (891)	total: 37.6s	remaining: 4.55s
892:	learn: 0.1302523	test: 0.1360293	best: 0.1360293 (892)	total: 37.6s	remaining: 4.51s
893:	learn: 0.1302459	test: 0.1360328	best: 0.1360293 (892)	total: 37.7s	remaining: 4.46s
894:	learn: 0.1302454	test: 0.1360333	best: 0.1360293 (892)	total: 37.7s	remaining: 4.42s
895:	learn: 0.1302327	test: 0.1360293	best: 0.1360293 (892)	total: 37.7s	remaining: 4.38s
896:	learn: 0.1302283	test: 0.1360295	best: 0.1360293 (892)	total: 37.8s	remaining: 4.34s
897:	learn: 0.1302103	test: 0.1360134	best: 0.1360134 (897)	total: 37.9s	remaining: 4.3s
898:	learn: 0.1302040	test: 0.1360207	best: 0.1360134 (897)	total: 37.9s	remaining: 4.26s
899:	learn: 0.1301920	test: 0.1360185	best: 0.1360134 (897)	total: 38s	remaining: 4.22s
900:	learn: 0.1301709	test: 0.1359964	best: 0.1359964 (900)	total: 38s	remaining: 4.18s
901:	learn: 0.1301680	test: 0.1359956	best: 0.1359956 (901)	total: 38.1s	remaining: 4.13s
902:	learn: 0.1301680	test: 0.1359956	best: 0.1359956 (902)	total: 38.1s	remaining: 4.09s
903:	learn: 0.1301673	test: 0.1359954	best: 0.1359954 (903)	total: 38.1s	remaining: 4.05s
904:	learn: 0.1301649	test: 0.1359918	best: 0.1359918 (904)	total: 38.2s	remaining: 4.01s
905:	learn: 0.1301639	test: 0.1359913	best: 0.1359913 (905)	total: 38.2s	remaining: 3.97s
906:	learn: 0.1301628	test: 0.1359933	best: 0.1359913 (905)	total: 38.3s	remaining: 3.92s
907:	learn: 0.1301541	test: 0.1360020	best: 0.1359913 (905)	total: 38.3s	remaining: 3.88s
908:	learn: 0.1301534	test: 0.1360022	best: 0.1359913 (905)	total: 38.3s	remaining: 3.84s
909:	learn: 0.1301534	test: 0.1360022	best: 0.1359913 (905)	total: 38.4s	remaining: 3.79s
910:	learn: 0.1301385	test: 0.1359915	best: 0.1359913 (905)	total: 38.4s	remaining: 3.75s
911:	learn: 0.1301382	test: 0.1359920	best: 0.1359913 (905)	total: 38.5s	remaining: 3.71s
912:	learn: 0.1301324	test: 0.1359882	best: 0.1359882 (912)	total: 38.5s	remaining: 3.67s
913:	learn: 0.1301089	test: 0.1359693	best: 0.1359693 (913)	total: 38.5s	remaining: 3.63s
914:	learn: 0.1301044	test: 0.1359730	best: 0.1359693 (913)	total: 38.6s	remaining: 3.58s
915:	learn: 0.1300864	test: 0.1359923	best: 0.1359693 (913)	total: 38.6s	remaining: 3.54s
916:	learn: 0.1300852	test: 0.1359947	best: 0.1359693 (913)	total: 38.7s	remaining: 3.5s
917:	learn: 0.1300799	test: 0.1359997	best: 0.1359693 (913)	total: 38.7s	remaining: 3.46s
918:	learn: 0.1300781	test: 0.1360056	best: 0.1359693 (913)	total: 38.7s	remaining: 3.41s
919:	learn: 0.1300780	test: 0.1360055	best: 0.1359693 (913)	total: 38.8s	remaining: 3.37s
920:	learn: 0.1300765	test: 0.1360019	best: 0.1359693 (913)	total: 38.8s	remaining: 3.33s
921:	learn: 0.1300713	test: 0.1359942	best: 0.1359693 (913)	total: 38.9s	remaining: 3.29s
922:	learn: 0.1300508	test: 0.1359944	best: 0.1359693 (913)	total: 38.9s	remaining: 3.25s
923:	learn: 0.1300472	test: 0.1359895	best: 0.1359693 (913)	total: 39s	remaining: 3.21s
924:	learn: 0.1300442	test: 0.1359900	best: 0.1359693 (913)	total: 39s	remaining: 3.16s
925:	learn: 0.1300440	test: 0.1359910	best: 0.1359693 (913)	total: 39.1s	remaining: 3.12s
926:	learn: 0.1300221	test: 0.1359796	best: 0.1359693 (913)	total: 39.1s	remaining: 3.08s
927:	learn: 0.1300115	test: 0.1359906	best: 0.1359693 (913)	total: 39.2s	remaining: 3.04s
928:	learn: 0.1300109	test: 0.1359913	best: 0.1359693 (913)	total: 39.2s	remaining: 3s
929:	learn: 0.1299892	test: 0.1359833	best: 0.1359693 (913)	total: 39.2s	remaining: 2.95s
930:	learn: 0.1299887	test: 0.1359837	best: 0.1359693 (913)	total: 39.3s	remaining: 2.91s
931:	learn: 0.1299886	test: 0.1359838	best: 0.1359693 (913)	total: 39.3s	remaining: 2.87s
932:	learn: 0.1299850	test: 0.1359831	best: 0.1359693 (913)	total: 39.4s	remaining: 2.83s
933:	learn: 0.1299847	test: 0.1359828	best: 0.1359693 (913)	total: 39.4s	remaining: 2.78s
934:	learn: 0.1299579	test: 0.1359563	best: 0.1359563 (934)	total: 39.4s	remaining: 2.74s
935:	learn: 0.1299453	test: 0.1359549	best: 0.1359549 (935)	total: 39.5s	remaining: 2.7s
936:	learn: 0.1299102	test: 0.1359140	best: 0.1359140 (936)	total: 39.5s	remaining: 2.66s
937:	learn: 0.1299092	test: 0.1359157	best: 0.1359140 (936)	total: 39.6s	remaining: 2.61s
938:	learn: 0.1299055	test: 0.1359090	best: 0.1359090 (938)	total: 39.6s	remaining: 2.57s
939:	learn: 0.1299049	test: 0.1359095	best: 0.1359090 (938)	total: 39.6s	remaining: 2.53s
940:	learn: 0.1299029	test: 0.1359096	best: 0.1359090 (938)	total: 39.7s	remaining: 2.49s
941:	learn: 0.1299029	test: 0.1359096	best: 0.1359090 (938)	total: 39.7s	remaining: 2.44s
942:	learn: 0.1299006	test: 0.1359084	best: 0.1359084 (942)	total: 39.8s	remaining: 2.4s
943:	learn: 0.1298917	test: 0.1359065	best: 0.1359065 (943)	total: 39.8s	remaining: 2.36s
944:	learn: 0.1298779	test: 0.1359070	best: 0.1359065 (943)	total: 39.8s	remaining: 2.32s
945:	learn: 0.1298752	test: 0.1359010	best: 0.1359010 (945)	total: 39.9s	remaining: 2.28s
946:	learn: 0.1298646	test: 0.1358950	best: 0.1358950 (946)	total: 39.9s	remaining: 2.23s
947:	learn: 0.1298614	test: 0.1358940	best: 0.1358940 (947)	total: 40s	remaining: 2.19s
948:	learn: 0.1298236	test: 0.1358703	best: 0.1358703 (948)	total: 40.1s	remaining: 2.15s
949:	learn: 0.1298196	test: 0.1358659	best: 0.1358659 (949)	total: 40.1s	remaining: 2.11s
950:	learn: 0.1298128	test: 0.1358707	best: 0.1358659 (949)	total: 40.2s	remaining: 2.07s
951:	learn: 0.1298113	test: 0.1358720	best: 0.1358659 (949)	total: 40.2s	remaining: 2.03s
952:	learn: 0.1297640	test: 0.1358979	best: 0.1358659 (949)	total: 40.2s	remaining: 1.98s
953:	learn: 0.1297558	test: 0.1358977	best: 0.1358659 (949)	total: 40.3s	remaining: 1.94s
954:	learn: 0.1297548	test: 0.1358968	best: 0.1358659 (949)	total: 40.3s	remaining: 1.9s
955:	learn: 0.1297446	test: 0.1358907	best: 0.1358659 (949)	total: 40.4s	remaining: 1.86s
956:	learn: 0.1297430	test: 0.1358883	best: 0.1358659 (949)	total: 40.4s	remaining: 1.81s
957:	learn: 0.1297396	test: 0.1358966	best: 0.1358659 (949)	total: 40.4s	remaining: 1.77s
958:	learn: 0.1297389	test: 0.1358975	best: 0.1358659 (949)	total: 40.5s	remaining: 1.73s
959:	learn: 0.1297361	test: 0.1358949	best: 0.1358659 (949)	total: 40.5s	remaining: 1.69s
960:	learn: 0.1297232	test: 0.1358819	best: 0.1358659 (949)	total: 40.6s	remaining: 1.65s
961:	learn: 0.1297027	test: 0.1358743	best: 0.1358659 (949)	total: 40.6s	remaining: 1.6s
962:	learn: 0.1296893	test: 0.1358882	best: 0.1358659 (949)	total: 40.7s	remaining: 1.56s
963:	learn: 0.1296758	test: 0.1358656	best: 0.1358656 (963)	total: 40.7s	remaining: 1.52s
964:	learn: 0.1296717	test: 0.1358667	best: 0.1358656 (963)	total: 40.7s	remaining: 1.48s
965:	learn: 0.1296715	test: 0.1358672	best: 0.1358656 (963)	total: 40.8s	remaining: 1.43s
966:	learn: 0.1296637	test: 0.1358661	best: 0.1358656 (963)	total: 40.8s	remaining: 1.39s
967:	learn: 0.1296619	test: 0.1358659	best: 0.1358656 (963)	total: 40.9s	remaining: 1.35s
968:	learn: 0.1296542	test: 0.1358658	best: 0.1358656 (963)	total: 40.9s	remaining: 1.31s
969:	learn: 0.1296539	test: 0.1358655	best: 0.1358655 (969)	total: 40.9s	remaining: 1.27s
970:	learn: 0.1296469	test: 0.1358721	best: 0.1358655 (969)	total: 41s	remaining: 1.22s
971:	learn: 0.1296206	test: 0.1358449	best: 0.1358449 (971)	total: 41.1s	remaining: 1.18s
972:	learn: 0.1296166	test: 0.1358356	best: 0.1358356 (972)	total: 41.1s	remaining: 1.14s
973:	learn: 0.1296149	test: 0.1358441	best: 0.1358356 (972)	total: 41.2s	remaining: 1.1s
974:	learn: 0.1296040	test: 0.1358391	best: 0.1358356 (972)	total: 41.2s	remaining: 1.06s
975:	learn: 0.1295878	test: 0.1358503	best: 0.1358356 (972)	total: 41.3s	remaining: 1.01s
976:	learn: 0.1295848	test: 0.1358532	best: 0.1358356 (972)	total: 41.3s	remaining: 972ms
977:	learn: 0.1295841	test: 0.1358545	best: 0.1358356 (972)	total: 41.3s	remaining: 930ms
978:	learn: 0.1295741	test: 0.1358467	best: 0.1358356 (972)	total: 41.4s	remaining: 888ms
979:	learn: 0.1295720	test: 0.1358507	best: 0.1358356 (972)	total: 41.4s	remaining: 845ms
980:	learn: 0.1295660	test: 0.1358544	best: 0.1358356 (972)	total: 41.5s	remaining: 803ms
981:	learn: 0.1295656	test: 0.1358550	best: 0.1358356 (972)	total: 41.5s	remaining: 761ms
982:	learn: 0.1295630	test: 0.1358542	best: 0.1358356 (972)	total: 41.5s	remaining: 718ms
983:	learn: 0.1295629	test: 0.1358552	best: 0.1358356 (972)	total: 41.6s	remaining: 676ms
984:	learn: 0.1295133	test: 0.1358294	best: 0.1358294 (984)	total: 41.6s	remaining: 634ms
985:	learn: 0.1295015	test: 0.1358246	best: 0.1358246 (985)	total: 41.6s	remaining: 591ms
986:	learn: 0.1295014	test: 0.1358245	best: 0.1358245 (986)	total: 41.7s	remaining: 549ms
987:	learn: 0.1295011	test: 0.1358252	best: 0.1358245 (986)	total: 41.7s	remaining: 507ms
988:	learn: 0.1294962	test: 0.1358245	best: 0.1358245 (986)	total: 41.8s	remaining: 465ms
989:	learn: 0.1294961	test: 0.1358264	best: 0.1358245 (986)	total: 41.8s	remaining: 422ms
990:	learn: 0.1293744	test: 0.1356882	best: 0.1356882 (990)	total: 41.9s	remaining: 380ms
991:	learn: 0.1293741	test: 0.1356893	best: 0.1356882 (990)	total: 41.9s	remaining: 338ms
992:	learn: 0.1293606	test: 0.1356839	best: 0.1356839 (992)	total: 41.9s	remaining: 296ms
993:	learn: 0.1293545	test: 0.1356781	best: 0.1356781 (993)	total: 42s	remaining: 253ms
994:	learn: 0.1293513	test: 0.1356812	best: 0.1356781 (993)	total: 42s	remaining: 211ms
995:	learn: 0.1293147	test: 0.1356702	best: 0.1356702 (995)	total: 42.1s	remaining: 169ms
996:	learn: 0.1293079	test: 0.1356781	best: 0.1356702 (995)	total: 42.1s	remaining: 127ms
997:	learn: 0.1292913	test: 0.1356605	best: 0.1356605 (997)	total: 42.2s	remaining: 84.6ms
998:	learn: 0.1292719	test: 0.1356571	best: 0.1356571 (998)	total: 42.2s	remaining: 42.3ms
999:	learn: 0.1292470	test: 0.1356399	best: 0.1356399 (999)	total: 42.3s	remaining: 0us

bestTest = 0.1356399446
bestIteration = 999

Out[183]:
<catboost.core.CatBoostRegressor at 0x29e1d7a4b38>

6 Stacking

In [190]:
#X, y = df.drop(['CreditRiskScore'], axis = 1), df['CreditRiskScore']

#X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size = 0.2, random_state = 0)
y= df1['CreditRiskScore']
X=df1.drop(['CreditRiskScore'], axis = 1)
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)  
In [191]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
#sc = StandardScaler()
x_train = sc.fit_transform(X_train)
x_test = sc.transform(X_test)
In [196]:
import numpy as np
import pandas as pd
# data precession
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
# model
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor




class Ensemble(object):
    def __init__(self, n_splits, stacker, base_models):
        self.n_splits = n_splits
        self.stacker = stacker
        self.base_models = base_models

    def fit_predict(self, X, y, T):
        X = np.array(X)
        y = np.array(y)
        T = np.array(T)

        folds = list(KFold(n_splits=self.n_splits, shuffle=True, random_state=2016).split(X, y))

        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test = np.zeros((T.shape[0], len(self.base_models)))
        for i, clf in enumerate(self.base_models):

            S_test_i = np.zeros((T.shape[0], self.n_splits))

            for j, (train_idx, test_idx) in enumerate(folds):
                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]
                y_holdout = y[test_idx]
                print ("Fit Model %d fold %d" % (i, j))
                clf.fit(X_train, y_train)
                y_pred = clf.predict(X_holdout)[:]                

                S_train[test_idx, i] = y_pred
                S_test_i[:, j] = clf.predict(T)[:]
            S_test[:, i] = S_test_i.mean(axis=1)

        # results = cross_val_score(self.stacker, S_train, y, cv=5, scoring='r2')
        # print("Stacker score: %.4f (%.4f)" % (results.mean(), results.std()))
        # exit()

        self.stacker.fit(S_train, y)
        res = self.stacker.predict(S_test)[:]
        return res

# rf params
rf_params = {}
rf_params['n_estimators'] = 1000
rf_params['max_depth'] = 8
rf_params['min_samples_split'] = 100
rf_params['min_samples_leaf'] = 30

# xgb params
xgb_params = {}
xgb_params['n_estimators'] = 500
xgb_params['min_child_weight'] = 12
xgb_params['learning_rate'] = 0.12
xgb_params['max_depth'] = 6
xgb_params['subsample'] = 0.77
xgb_params['reg_lambda'] = 0.8
xgb_params['reg_alpha'] = 0.4
xgb_params['base_score'] = 0
#xgb_params['seed'] = 400
xgb_params['silent'] = 1


# lgb params
lgb_params = {}
lgb_params['n_estimators'] = 450
lgb_params['max_bin'] = 8
lgb_params['learning_rate'] = 0.037 # shrinkage_rate
lgb_params['metric'] = 'l1'          # or 'mae'
lgb_params['sub_feature'] = 0.35    
lgb_params['bagging_fraction'] = 0.85 # sub_row
lgb_params['bagging_freq'] = 40
lgb_params['num_leaves'] = 512        # num_leaf
lgb_params['min_data'] = 500         # min_data_in_leaf
lgb_params['min_hessian'] = 0.05     # min_sum_hessian_in_leaf
lgb_params['verbose'] = 0
lgb_params['feature_fraction_seed'] = 2
lgb_params['bagging_seed'] = 3


# XGB model
xgb_model = XGBRegressor(**xgb_params)

# lgb model
lgb_model = LGBMRegressor(**lgb_params)

# RF model
rf_model = RandomForestRegressor(**rf_params)

# ET model
et_model = ExtraTreesRegressor()

# SVR model
#SVM is too slow in more then 10000 set
#svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.05)

# DecsionTree model
dt_model = DecisionTreeRegressor()

# AdaBoost model
ada_model = AdaBoostRegressor(learning_rate=0.1, loss='square', n_estimators=1000)

stack = Ensemble(n_splits=5,
        stacker=LinearRegression(),
        base_models=(rf_model,lgb_model, ada_model,et_model,dt_model,xgb_model))
In [197]:
y_pred_stack = stack.fit_predict(x_train, y_train, x_test)
Fit Model 0 fold 0
Fit Model 0 fold 1
Fit Model 0 fold 2
Fit Model 0 fold 3
Fit Model 0 fold 4
Fit Model 1 fold 0
Fit Model 1 fold 1
Fit Model 1 fold 2
Fit Model 1 fold 3
Fit Model 1 fold 4
Fit Model 2 fold 0
Fit Model 2 fold 1
Fit Model 2 fold 2
Fit Model 2 fold 3
Fit Model 2 fold 4
Fit Model 3 fold 0
Fit Model 3 fold 1
Fit Model 3 fold 2
Fit Model 3 fold 3
Fit Model 3 fold 4
Fit Model 4 fold 0
Fit Model 4 fold 1
Fit Model 4 fold 2
Fit Model 4 fold 3
Fit Model 4 fold 4
Fit Model 5 fold 0
Fit Model 5 fold 1
Fit Model 5 fold 2
Fit Model 5 fold 3
Fit Model 5 fold 4
In [201]:
#print('Train r2 score: ', r2_score(xgb_pred_train, y_train))
print('Test r2 score: ', r2_score(y_pred_stack, y_test))
#train_mse5 = mean_squared_error(xgb_pred_train, y_train)
test_mse5 = mean_squared_error(y_pred_stack, y_test)
#train_rmse5 = np.sqrt(train_mse5)
test_rmse5 = np.sqrt(test_mse5)
#print('Train RMSE: %.4f' % train_rmse5)
print('Test RMSE: %.4f' % test_rmse5)
Test r2 score:  0.6445651369935155
Test RMSE: 0.1291


ON LEADERBOARD SCORE- 116.6

we got least RMSE

7 AdaBoost

In [234]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.datasets import make_regression
X_train, y_train = make_regression(n_features=4, n_informative=2,
                       random_state=0, shuffle=False)
regr = AdaBoostRegressor(random_state=0, n_estimators=100,learning_rate=0.001)
regr.fit(X_train, y_train)
Out[234]:
AdaBoostRegressor(base_estimator=None, learning_rate=0.001, loss='linear',
         n_estimators=100, random_state=0)
In [235]:
from sklearn.ensemble import AdaBoostRegressor
ada2=AdaBoostRegressor(n_estimators=500,learning_rate=0.001,random_state=1)
score=np.mean(cross_val_score(ada2,X_train,y_train,scoring='neg_mean_squared_error',cv=5,n_jobs=1))
score 
Out[235]:
-203.6949771587962

Dimensionality Reduction

PCA

In [206]:
from sklearn.decomposition import PCA
#scaled_matrix=X_train.as_matrix
matrix = X_train.as_matrix()
scaler = StandardScaler()
scaler.fit(matrix)
scaled_matrix = scaler.transform(matrix)
In [207]:
pca = PCA()
pca.fit(scaled_matrix)
pca_samples = pca.transform(scaled_matrix)
In [213]:
fig, ax = plt.subplots(figsize=(50,20))
sns.set(font_scale=1)
plt.step(range(matrix.shape[1]), pca.explained_variance_ratio_.cumsum(), where='mid',
         label='cumulative explained variance')
sns.barplot(np.arange(1,matrix.shape[1]+1), pca.explained_variance_ratio_, alpha=0.5, color = 'g',
            label='individual explained variance')
plt.xlim(0, 50)

ax.set_xticklabels([s if int(s.get_text())%2 == 0 else '' for s in ax.get_xticklabels()])

plt.ylabel('Explained variance', fontsize = 50)
plt.xlabel('Principal components', fontsize = 50)
plt.legend(loc='best', fontsize = 50);
In [214]:
#Principle component analysis
from sklearn.decomposition import PCA
pca = PCA(n_components=50)
pca.fit(X_train)
pca_X_train=pca.transform(X_train)
pca_X_test=pca.transform(X_test)

8 XG Boost ON PCA

In [217]:
X_train= pca_X_train
X_test= pca_X_test
xgb_model7 = XGBRegressor(subsample= 0.9, silent= 1, 
                          nthread= 4, n_estimators= 500, min_child_weight= 4,
                          max_depth= 6, learning_rate= 0.05, colsample_bytree= 1)
xgb_model7.fit(X_train, y_train, early_stopping_rounds=5, 
             eval_set=[(X_test, y_test)], verbose=False)
y_train_pred7 = xgb_model7.predict(X_train)
y_pred7 = xgb_model7.predict(X_test)

print('Train r2 score: ', r2_score(y_train_pred7, y_train))
print('Test r2 score: ', r2_score(y_test, y_pred7))
train_mse5 = mean_squared_error(y_train_pred7, y_train)
test_mse5 = mean_squared_error(y_pred7, y_test)
train_rmse5 = np.sqrt(train_mse5)
test_rmse5 = np.sqrt(test_mse5)
print('Train RMSE: %.4f' % train_rmse5)
print('Test RMSE: %.4f' % test_rmse5)
Train r2 score:  0.7267728751722196
Test r2 score:  0.6859025229693732
Train RMSE: 0.1111
Test RMSE: 0.1399


Test Data

In [237]:
print("test_demographic dataset has {} samples with {} features each.".format(*test_demo_dt.shape))
print ("test_payment dataset has {} samples with {} features each.".format(*test_payment_dt.shape))
test_demographic dataset has 2774 samples with 8 features each.
test_payment dataset has 67621 samples with 19 features each.
  • describing data

demographic data

In [238]:
test_demo_dt.head()
Out[238]:
CustomerID DOB Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties
0 C41116 1973-04-21 1876.000 1 396.162 1 Yes 1
1 C41117 1970-03-14 1261.000 0 195.605 0 No 1
2 C41118 1981-01-03 1026.000 1 212.937 0 Yes 2
3 C41119 1983-01-04 1384.000 0 396.162 1 Yes 1
4 C41120 1979-03-16 1913.000 1 220.365 1 Yes 2
In [239]:
test_demo_dt.columns
Out[239]:
Index(['CustomerID', 'DOB', 'Salary', 'ProfessionalLicensure',
       'UtilitySpending', 'eCommerceAccount', 'SocialMediaAccount',
       'NoOfProperties'],
      dtype='object')
In [240]:
test_demo_dt.dtypes
Out[240]:
CustomerID                object
DOB                       object
Salary                   float64
ProfessionalLicensure      int64
UtilitySpending          float64
eCommerceAccount           int64
SocialMediaAccount        object
NoOfProperties             int64
dtype: object

payment data

In [241]:
test_payment_dt.head(30)
Out[241]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status
0 C41116 25 25 119 61031.100 33.911 10.500 2.263 2.899 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
1 C41116 26 25 119 60882.420 34.007 10.500 2.251 2.151 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
2 C41116 27 25 119 60729.800 34.335 10.500 2.224 2.362 4.400 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
3 C41116 28 25 119 60576.140 34.673 10.875 2.197 1.229 4.600 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
4 C41116 29 25 119 60424.390 34.952 10.875 2.174 1.693 4.500 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
5 C41116 30 25 119 60268.480 35.622 10.875 2.127 2.274 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
6 C41116 31 25 119 60108.280 37.662 10.875 2.007 1.851 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
7 C41116 32 25 119 59944.710 40.496 11.000 1.861 1.104 5.000 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
8 C41116 33 25 119 59778.680 41.637 11.000 1.805 0.837 5.000 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
9 C41116 34 25 119 59603.760 43.232 10.500 1.734 -0.314 5.800 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
10 C41116 35 25 119 59415.470 46.089 10.500 1.621 -2.806 6.500 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
11 C41116 36 25 119 59222.200 49.157 10.500 1.515 -3.517 7.800 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
12 C41116 37 25 119 59023.800 48.395 9.250 1.534 -4.147 9.000 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
13 C41116 38 25 119 58820.160 46.581 9.250 1.588 -3.340 9.500 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
14 C41116 39 25 119 58611.120 46.595 9.250 1.582 -0.241 10.000 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
15 C41116 40 25 119 58396.560 47.004 9.250 1.562 1.586 9.800 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
16 C41116 41 25 119 58176.330 45.416 9.250 1.611 2.682 9.900 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
17 C41116 42 25 119 57950.270 45.257 9.250 1.610 3.029 9.400 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
18 C41116 43 25 119 57718.220 46.508 9.250 1.560 2.694 9.400 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
19 C41116 44 25 119 57480.030 47.891 9.250 1.509 1.876 9.200 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
20 C41116 45 25 119 57235.530 46.468 9.250 1.549 1.639 9.100 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
21 C41116 46 25 119 56984.560 46.044 9.250 1.556 1.176 9.000 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
22 C41116 47 25 119 56726.940 47.684 9.250 1.496 1.668 8.800 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
23 C41116 48 25 119 56462.500 48.476 9.250 1.464 2.716 8.300 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
24 C41116 49 25 119 56191.060 45.599 9.250 1.549 2.457 8.200 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
25 C41116 50 25 119 55912.430 44.231 10.500 1.589 2.366 8.200 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
26 C41116 52 25 119 55332.840 43.178 10.500 1.611 1.081 8.000 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
27 C41116 53 25 119 55031.480 39.957 10.500 1.732 0.893 7.600 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
28 C41116 54 25 119 54722.150 38.231 10.500 1.800 1.507 7.300 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
29 C41116 55 25 119 54404.620 37.988 10.500 1.801 2.422 7.200 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default
In [242]:
test_payment_dt.dtypes
Out[242]:
CustomerID                                 object
Current_Instalment_Sequence                 int64
Starting_Instalment                         int64
Maturity_Period                             int64
Current_Outstanding                       float64
Current_Loan_to_Appraisedvalu_Percent     float64
CurrentInterestrate                       float64
RealEstate_Current_Inflation              float64
GDP                                       float64
UnemploymentRate                          float64
Asset_type                                 object
Urban_Development                          object
Villa_House                                object
Investment_SelfOccupied                    object
Starting_outstanding                      float64
Starting_Loan_to_Appraisedvalu_Percent    float64
StartingInterestrate                      float64
RealEstate_Starting_Inflation             float64
Payment_Status                             object
dtype: object

Convert DOB into Age

In [243]:
test_demo_dt['DOB']=test_demo_dt['DOB'].astype('datetime64')
test_demo_dt['age'] = (pd.to_datetime('now') - test_demo_dt['DOB']).astype('<m8[Y]')
test_demo_dt['age'] = test_demo_dt['age'].astype('int')

remove DOB

In [244]:
test_demo_dt=test_demo_dt.drop(axis=1,columns='DOB')
combine two dataset on primary key that is CustomerID for doing further analysis
In [245]:
test_merge_dt=test_payment_dt.merge(test_demo_dt, left_on='CustomerID', right_on='CustomerID')
In [246]:
test_merge_dt.columns
Out[246]:
Index(['CustomerID', 'Current_Instalment_Sequence', 'Starting_Instalment',
       'Maturity_Period', 'Current_Outstanding',
       'Current_Loan_to_Appraisedvalu_Percent', 'CurrentInterestrate',
       'RealEstate_Current_Inflation', 'GDP', 'UnemploymentRate', 'Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
       'Payment_Status', 'Salary', 'ProfessionalLicensure', 'UtilitySpending',
       'eCommerceAccount', 'SocialMediaAccount', 'NoOfProperties', 'age'],
      dtype='object')
In [247]:
test_merge_dt.head()
Out[247]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties age
0 C41116 25 25 119 61031.100 33.911 10.500 2.263 2.899 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
1 C41116 26 25 119 60882.420 34.007 10.500 2.251 2.151 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
2 C41116 27 25 119 60729.800 34.335 10.500 2.224 2.362 4.400 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
3 C41116 28 25 119 60576.140 34.673 10.875 2.197 1.229 4.600 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
4 C41116 29 25 119 60424.390 34.952 10.875 2.174 1.693 4.500 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
In [248]:
test_merge_dt.shape
Out[248]:
(67621, 26)
In [249]:
sample_file.shape
Out[249]:
(2774, 2)
In [250]:
test_merge_dt.head(5)
Out[250]:
CustomerID Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Payment_Status Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount NoOfProperties age
0 C41116 25 25 119 61031.100 33.911 10.500 2.263 2.899 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
1 C41116 26 25 119 60882.420 34.007 10.500 2.251 2.151 4.700 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
2 C41116 27 25 119 60729.800 34.335 10.500 2.224 2.362 4.400 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
3 C41116 28 25 119 60576.140 34.673 10.875 2.197 1.229 4.600 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
4 C41116 29 25 119 60424.390 34.952 10.875 2.174 1.693 4.500 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 Non-Payoff/Non-Default 1876.000 1 396.162 1 Yes 1 46
In [251]:
test_merge_dt.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 67621 entries, 0 to 67620
Data columns (total 26 columns):
CustomerID                                67621 non-null object
Current_Instalment_Sequence               67621 non-null int64
Starting_Instalment                       67621 non-null int64
Maturity_Period                           67621 non-null int64
Current_Outstanding                       67621 non-null float64
Current_Loan_to_Appraisedvalu_Percent     67621 non-null float64
CurrentInterestrate                       67621 non-null float64
RealEstate_Current_Inflation              67621 non-null float64
GDP                                       67621 non-null float64
UnemploymentRate                          67621 non-null float64
Asset_type                                67621 non-null object
Urban_Development                         67621 non-null object
Villa_House                               67621 non-null object
Investment_SelfOccupied                   67621 non-null object
Starting_outstanding                      67621 non-null float64
Starting_Loan_to_Appraisedvalu_Percent    67621 non-null float64
StartingInterestrate                      67621 non-null float64
RealEstate_Starting_Inflation             67621 non-null float64
Payment_Status                            67621 non-null object
Salary                                    67621 non-null float64
ProfessionalLicensure                     67621 non-null int64
UtilitySpending                           67621 non-null float64
eCommerceAccount                          67621 non-null int64
SocialMediaAccount                        67621 non-null object
NoOfProperties                            67621 non-null int64
age                                       67621 non-null int32
dtypes: float64(12), int32(1), int64(6), object(7)
memory usage: 13.7+ MB
In [252]:
test_merge_dt.describe(include='all').T.sort_values("count")
Out[252]:
count unique top freq mean std min 25% 50% 75% max
CustomerID 67621 2774 C43195 48 NaN NaN NaN NaN NaN NaN NaN
SocialMediaAccount 67621 2 Yes 46266 NaN NaN NaN NaN NaN NaN NaN
eCommerceAccount 67621.000 NaN NaN NaN 0.504 0.500 0.000 0.000 1.000 1.000 1.000
UtilitySpending 67621.000 NaN NaN NaN 342.369 89.351 185.701 267.409 339.214 420.922 495.202
ProfessionalLicensure 67621.000 NaN NaN NaN 0.575 0.494 0.000 0.000 1.000 1.000 1.000
Salary 67621.000 NaN NaN NaN 3936.764 7134.267 1002.168 1419.000 1891.477 4217.312 158198.801
Payment_Status 67621 3 Non-Payoff/Non-Default 65750 NaN NaN NaN NaN NaN NaN NaN
RealEstate_Starting_Inflation 67621.000 NaN NaN NaN 1.956 0.350 0.765 1.794 2.089 2.224 2.263
StartingInterestrate 67621.000 NaN NaN NaN 5.779 2.850 0.000 5.250 6.250 7.375 16.500
Starting_Loan_to_Appraisedvalu_Percent 67621.000 NaN NaN NaN 78.310 10.516 50.100 74.000 80.000 80.000 218.500
Starting_outstanding 67621.000 NaN NaN NaN 255618.947 216084.325 13660.500 99900.000 180000.000 369000.000 1995000.000
Investment_SelfOccupied 67621 2 Self Occupancy 56002 NaN NaN NaN NaN NaN NaN NaN
Villa_House 67621 2 Yes 41921 NaN NaN NaN NaN NaN NaN NaN
Urban_Development 67621 2 No 59720 NaN NaN NaN NaN NaN NaN NaN
Asset_type 67621 2 No shred services 63340 NaN NaN NaN NaN NaN NaN NaN
UnemploymentRate 67621.000 NaN NaN NaN 6.896 1.964 3.800 5.000 6.500 9.000 10.000
GDP 67621.000 NaN NaN NaN 1.203 2.039 -4.147 0.893 1.693 2.556 5.132
RealEstate_Current_Inflation 67621.000 NaN NaN NaN 1.791 0.265 1.078 1.562 1.734 2.007 2.263
CurrentInterestrate 67621.000 NaN NaN NaN 6.606 2.066 1.000 5.574 6.500 7.750 37.500
Current_Loan_to_Appraisedvalu_Percent 67621.000 NaN NaN NaN 82.332 25.938 0.000 65.298 82.674 101.619 262.205
Current_Outstanding 67621.000 NaN NaN NaN 241462.890 208716.542 0.000 90925.170 168754.380 345475.800 1946648.790
Maturity_Period 67621.000 NaN NaN NaN 135.274 21.407 31.000 135.000 141.000 145.000 190.000
Starting_Instalment 67621.000 NaN NaN NaN 23.752 5.291 1.000 20.000 24.000 28.000 50.000
Current_Instalment_Sequence 67621.000 NaN NaN NaN 37.776 11.088 1.000 30.000 37.000 46.000 60.000
NoOfProperties 67621.000 NaN NaN NaN 1.479 0.829 1.000 1.000 1.000 2.000 4.000
age 67621.000 NaN NaN NaN 39.045 3.985 23.000 37.000 38.000 40.000 70.000
  • Observation - ProfessionalLicensure,eCommerceAccount,NoOfProperties are categorical type so need to change its data types
  • observation -CreditRiskScore is target columns and we need to remove before to build the clusters
In [253]:
print(test_merge_dt.ProfessionalLicensure.unique())
print(test_merge_dt.eCommerceAccount.unique())
print(test_merge_dt.NoOfProperties.unique())
[1 0]
[1 0]
[1 2 3 4]
In [254]:
test_merge_dt['ProfessionalLicensure']=test_merge_dt['ProfessionalLicensure'].astype('object')
test_merge_dt['eCommerceAccount']=test_merge_dt['eCommerceAccount'].astype('object')
test_merge_dt['NoOfProperties']=test_merge_dt['NoOfProperties'].astype('object')
In [255]:
test_merge_dt.Urban_Development.unique()
Out[255]:
array(['No', 'Yes'], dtype=object)
In [256]:
test_merge_dt.describe(include=['O'])
Out[256]:
CustomerID Asset_type Urban_Development Villa_House Investment_SelfOccupied Payment_Status ProfessionalLicensure eCommerceAccount SocialMediaAccount NoOfProperties
count 67621 67621 67621 67621 67621 67621 67621 67621 67621 67621
unique 2774 2 2 2 2 3 2 2 2 4
top C43195 No shred services No Yes Self Occupancy Non-Payoff/Non-Default 1 1 Yes 1
freq 48 63340 59720 41921 56002 65750 38858 34072 46266 46415
In [257]:
test_merge_dt.describe(include=['float32','float64','int64','int32'])
Out[257]:
Current_Instalment_Sequence Starting_Instalment Maturity_Period Current_Outstanding Current_Loan_to_Appraisedvalu_Percent CurrentInterestrate RealEstate_Current_Inflation GDP UnemploymentRate Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation Salary UtilitySpending age
count 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000 67621.000
mean 37.776 23.752 135.274 241462.890 82.332 6.606 1.791 1.203 6.896 255618.947 78.310 5.779 1.956 3936.764 342.369 39.045
std 11.088 5.291 21.407 208716.542 25.938 2.066 0.265 2.039 1.964 216084.325 10.516 2.850 0.350 7134.267 89.351 3.985
min 1.000 1.000 31.000 0.000 0.000 1.000 1.078 -4.147 3.800 13660.500 50.100 0.000 0.765 1002.168 185.701 23.000
25% 30.000 20.000 135.000 90925.170 65.298 5.574 1.562 0.893 5.000 99900.000 74.000 5.250 1.794 1419.000 267.409 37.000
50% 37.000 24.000 141.000 168754.380 82.674 6.500 1.734 1.693 6.500 180000.000 80.000 6.250 2.089 1891.477 339.214 38.000
75% 46.000 28.000 145.000 345475.800 101.619 7.750 2.007 2.556 9.000 369000.000 80.000 7.375 2.224 4217.312 420.922 40.000
max 60.000 50.000 190.000 1946648.790 262.205 37.500 2.263 5.132 10.000 1995000.000 218.500 16.500 2.263 158198.801 495.202 70.000

check missing data

In [258]:
#function to find missing values
def miss_data(x):
    total = x.isnull().sum().sort_values(ascending=False)
    percent = (x.isnull().sum()/x.isnull().count()).sort_values(ascending=False)
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
   
    missing_data.index.name = 'column_names'
    missing_data.reset_index(inplace=True)
    sns.set(style="whitegrid")
    sns.set(rc={'figure.figsize':(20,8.27)})
    sns.barplot(missing_data.column_names,missing_data.Percent, alpha=0.9)
    print(plt.title('missing data plot'))
    print(plt.ylabel('percentage of missing data', fontsize=12))
    print(plt.xlabel('column names', fontsize=12))
    print(plt.show())
    print(missing_data)
In [259]:
miss_data(x=test_merge_dt)
Text(0.5,1,'missing data plot')
Text(0,0.5,'percentage of missing data')
Text(0.5,0,'column names')
None
                              column_names  Total  Percent
0                                      age      0    0.000
1                           NoOfProperties      0    0.000
2              Current_Instalment_Sequence      0    0.000
3                      Starting_Instalment      0    0.000
4                          Maturity_Period      0    0.000
5                      Current_Outstanding      0    0.000
6    Current_Loan_to_Appraisedvalu_Percent      0    0.000
7                      CurrentInterestrate      0    0.000
8             RealEstate_Current_Inflation      0    0.000
9                                      GDP      0    0.000
10                        UnemploymentRate      0    0.000
11                              Asset_type      0    0.000
12                       Urban_Development      0    0.000
13                             Villa_House      0    0.000
14                 Investment_SelfOccupied      0    0.000
15                    Starting_outstanding      0    0.000
16  Starting_Loan_to_Appraisedvalu_Percent      0    0.000
17                    StartingInterestrate      0    0.000
18           RealEstate_Starting_Inflation      0    0.000
19                          Payment_Status      0    0.000
20                                  Salary      0    0.000
21                   ProfessionalLicensure      0    0.000
22                         UtilitySpending      0    0.000
23                        eCommerceAccount      0    0.000
24                      SocialMediaAccount      0    0.000
25                              CustomerID      0    0.000
  • there is not any NA's present

Feature engineering

In [260]:
test_merge_dt['Appraisal_value']=test_merge_dt['Starting_outstanding']/(test_merge_dt['Starting_Loan_to_Appraisedvalu_Percent']/100)
In [261]:
test_merge_dt['current_Appraisal_value']=test_merge_dt['Current_Outstanding']/(test_merge_dt['Current_Loan_to_Appraisedvalu_Percent']/100)
In [262]:
test_merge_dt['remaining_outstanding']=  test_merge_dt['Starting_outstanding'] - test_merge_dt['Current_Outstanding']
In [263]:
test_merge_dt.columns
Out[263]:
Index(['CustomerID', 'Current_Instalment_Sequence', 'Starting_Instalment',
       'Maturity_Period', 'Current_Outstanding',
       'Current_Loan_to_Appraisedvalu_Percent', 'CurrentInterestrate',
       'RealEstate_Current_Inflation', 'GDP', 'UnemploymentRate', 'Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
       'Payment_Status', 'Salary', 'ProfessionalLicensure', 'UtilitySpending',
       'eCommerceAccount', 'SocialMediaAccount', 'NoOfProperties', 'age',
       'Appraisal_value', 'current_Appraisal_value', 'remaining_outstanding'],
      dtype='object')

MAke cluster on Test Data

In [ ]:
test_matrix=test_merge_dt.as_matrix()
kmeans2.fit_predict(test_matrix)

clusters=kmeans2.labels_

test_cluster_class=clusters.tolist()


Data Needs Grouping

In [264]:
test_merge_dt_1=test_merge_dt[['CustomerID','Current_Instalment_Sequence', 'Current_Outstanding','Current_Loan_to_Appraisedvalu_Percent','CurrentInterestrate',
                    'RealEstate_Current_Inflation','GDP', 'UnemploymentRate','current_Appraisal_value','remaining_outstanding']]
In [265]:
test_merge_dt_1.shape
Out[265]:
(67621, 10)


Data Don't Need Grouping

In [269]:
test_merge_dt_2=test_merge_dt[['CustomerID', 'Starting_Instalment','Maturity_Period','Asset_type',
       'Urban_Development', 'Villa_House', 'Investment_SelfOccupied',
       'Starting_outstanding', 'Starting_Loan_to_Appraisedvalu_Percent',
       'StartingInterestrate', 'RealEstate_Starting_Inflation',
        'age', 'Salary', 'ProfessionalLicensure',
       'UtilitySpending', 'eCommerceAccount', 'SocialMediaAccount','Appraisal_value',
       'NoOfProperties']]
In [270]:
test_merge_dt_2=test_merge_dt_2.drop_duplicates()# drop duplicates from dataset
In [271]:
test_merge_dt_2.shape
Out[271]:
(2774, 19)


Function To Group The Data

In [272]:
def feat_eng(data):
    df = pd.DataFrame()
    for col in data.columns:
        if col in ['CustomerID']:
            continue
        df[col + '_mean'] = data.groupby(['CustomerID'])[col].mean()
        df[col + '_median'] = data.groupby(['CustomerID'])[col].median()
        df[col + '_max'] = data.groupby(['CustomerID'])[col].max()
        df[col + '_min'] = data.groupby(['CustomerID'])[col].min()
        df[col + '_std'] = data.groupby(['CustomerID'])[col].std()
        #df[col + '_kurtosis'] = data.groupby(['CustomerID'])[col].kurtosis()
        df[col + '_skew'] = data.groupby(['CustomerID'])[col].skew() 
        df[col + '_range'] = df[col + '_max'] - df[col + '_min']
        df[col + '_kurtosis'] = data.groupby(['CustomerID'])[col].apply(lambda x: x.kurtosis())
        df[col + '_maxtoMin'] = df[col + '_max'] / df[col + '_min']
        #df[col + '_coeffvar'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.std(x) / np.mean(x))
        #in statistics, the median absolute deviation (MAD) is a robust measure of the variablility of a univariate sample of quantitative data.
        df[col + '_meanAD'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        df[col + '_mad'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.median(np.abs(np.diff(x))))
        df[col + '_abs_max'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.max(np.abs(x)))
        df[col + '_abs_min'] = data.groupby(['CustomerID'])[col].apply(lambda x: np.min(np.abs(x)))
        df[col + '_abs_avg'] = (df[col + '_abs_min'] + df[col + '_abs_max'])/2
    return df 
In [273]:
test_final_dt_1=feat_eng(data=test_merge_dt_1)
In [274]:
test_final_dt_1.head(10)
Out[274]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg
CustomerID
C41116 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 57546.403 57950.270 61031.100 52686.350 2518.907 -0.410 8344.750 -1.032 1.158 245.434 229.055 61031.100 52686.350 56858.725 41.627 43.178 49.157 33.911 5.301 -0.159 15.246 -1.645 1.450 1.191 1.097 49.157 33.911 41.534 10.107 10.500 11.000 9.250 0.685 -0.386 1.750 -1.685 1.189 0.103 0.000 11.000 9.250 10.125 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 140488.186 137726.587 179974.299 116475.479 19594.911 0.783 63498.820 -0.589 1.545 3703.646 3165.397 179974.299 116475.479 148224.889 6203.597 5799.730 11063.650 2718.900 2518.907 0.410 8344.750 -1.032 4.069 245.434 229.055 11063.650 2718.900 6891.275
C41117 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 25540.784 25924.430 29641.160 20092.000 2887.515 -0.350 9549.160 -1.082 1.475 280.858 274.935 29641.160 20092.000 24866.580 17.869 18.569 22.174 12.904 2.635 -0.319 9.270 -1.112 1.718 0.558 0.443 22.174 12.904 17.539 8.990 8.990 8.990 8.990 0.000 0.000 0.000 0.000 1.000 0.000 0.000 8.990 8.990 8.990 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 144896.970 142048.707 185622.232 120130.699 20209.836 0.783 65491.533 -0.589 1.545 3819.873 3264.733 185622.232 120130.699 152876.465 9559.216 9175.570 15008.000 5458.840 2887.515 0.350 9549.160 -1.082 2.749 280.858 274.935 15008.000 5458.840 10233.420
C41118 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 88553.436 88748.400 92310.090 83836.020 2439.412 -0.220 8474.070 -1.099 1.101 301.991 277.810 92310.090 83836.020 88073.055 91.750 94.125 107.229 73.448 11.394 -0.258 33.781 -1.465 1.460 2.551 2.251 107.229 73.448 90.339 8.211 8.200 10.875 6.700 1.128 0.641 4.175 0.600 1.623 0.201 0.000 10.875 6.700 8.787 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 98106.412 96177.918 125680.552 81337.738 13683.616 0.783 44342.813 -0.589 1.545 2586.348 2210.476 125680.552 81337.738 103509.145 4246.564 4051.600 8963.980 489.910 2439.412 0.220 8474.070 -1.099 18.297 301.991 277.810 8963.980 489.910 4726.945
C41119 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 160142.145 160372.280 164438.800 155180.900 2745.590 -0.212 9257.900 -1.090 1.060 272.291 256.810 164438.800 155180.900 159809.850 102.933 104.481 121.111 81.130 12.939 -0.318 39.981 -1.335 1.493 2.855 2.627 121.111 81.130 101.120 7.329 7.500 7.500 6.000 0.484 -2.535 1.500 4.689 1.250 0.044 0.000 7.500 6.000 6.750 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 158217.589 155107.480 202686.791 131174.513 22067.760 0.783 71512.278 -0.589 1.545 4171.040 3564.866 202686.791 131174.513 166930.652 4857.855 4627.720 9819.100 561.200 2745.590 0.212 9257.900 -1.090 17.497 272.291 256.810 9819.100 561.200 5190.150
C41120 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 63062.625 63384.120 66606.840 58520.110 2496.772 -0.312 8086.730 -1.202 1.138 237.845 201.495 66606.840 58520.110 62563.475 92.498 95.859 109.070 75.042 11.657 -0.172 34.028 -1.611 1.453 2.619 2.317 109.070 75.042 92.056 7.290 6.975 9.975 6.975 0.676 2.843 3.000 8.162 1.430 0.176 0.000 9.975 6.975 8.475 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 69285.776 67923.814 88759.484 57443.221 9663.792 0.783 31316.263 -0.589 1.545 1826.559 1561.106 88759.484 57443.221 73101.353 3937.375 3615.880 8479.890 393.160 2496.772 0.312 8086.730 -1.202 21.569 237.845 201.495 8479.890 393.160 4436.525
C41121 37.000 37.000 48 26 6.782 0.000 22 -1.200 1.846 1.000 1.000 48 26 37.000 141858.406 142113.190 146118.240 137711.720 2749.158 0.098 8406.520 -1.498 1.061 422.474 399.300 146118.240 137711.720 141914.980 63.032 67.032 71.592 49.392 7.872 -0.765 22.199 -1.034 1.449 1.805 1.772 71.592 49.392 60.492 7.560 7.560 7.560 7.560 0.000 0.000 0.000 0.000 1.000 0.000 0.000 7.560 7.560 7.560 1.745 1.610 2.251 1.464 0.273 0.906 0.786 -0.836 1.537 0.051 0.047 2.251 1.464 1.858 0.791 1.639 3.029 -4.147 2.167 -1.336 7.176 0.563 -0.731 0.881 0.670 4.147 0.241 2.194 7.352 8.300 10.000 4.400 2.209 -0.253 5.600 -1.850 2.273 0.364 0.200 10.000 4.400 7.200 229331.592 211589.607 295831.183 192467.689 35927.342 0.906 103363.494 -0.836 1.537 6710.294 6235.979 295831.183 192467.689 244149.436 5141.594 4886.810 9288.280 881.760 2749.158 -0.098 8406.520 -1.498 10.534 422.474 399.300 9288.280 881.760 5085.020
C41122 33.500 33.500 41 26 4.761 0.000 15 -1.200 1.577 1.000 1.000 41 26 33.500 201369.084 201821.305 202393.890 199544.240 1002.732 -0.724 2849.650 -0.984 1.014 392.361 316.350 202393.890 199544.240 200969.065 83.716 84.479 99.843 67.208 12.440 -0.137 32.634 -1.770 1.486 2.779 2.402 99.843 67.208 83.526 6.080 6.080 6.080 6.080 0.000 0.000 0.000 0.000 1.000 0.000 0.000 6.080 6.080 6.080 1.837 1.769 2.251 1.515 0.281 0.364 0.736 -1.679 1.486 0.059 0.048 2.251 1.515 1.883 0.213 1.167 2.682 -4.147 2.350 -0.905 6.829 -0.751 -0.647 1.043 0.747 4.147 0.241 2.194 6.619 5.400 10.000 4.400 2.282 0.553 5.600 -1.624 2.273 0.427 0.300 10.000 4.400 7.200 245744.799 236707.801 301144.070 202653.504 37618.789 0.364 98490.566 -1.679 1.486 7877.105 6488.444 301144.070 202653.504 251898.787 1630.916 1178.695 3455.760 606.110 1002.732 0.724 2849.650 -0.984 5.702 392.361 316.350 3455.760 606.110 2030.935
C41123 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 28823.531 29117.050 30795.220 25958.760 1468.562 -0.459 4836.460 -1.019 1.186 142.249 121.995 30795.220 25958.760 28376.990 25.501 26.273 30.268 20.931 3.293 -0.139 9.338 -1.675 1.446 0.740 0.691 30.268 20.931 25.600 12.000 12.000 12.000 12.000 0.000 0.000 0.000 0.000 1.000 0.000 0.000 12.000 12.000 12.000 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 114849.176 112591.567 147129.097 95218.773 16018.851 0.783 51910.323 -0.589 1.545 3027.732 2587.714 147129.097 95218.773 121173.935 3576.469 3282.950 6441.240 1604.780 1468.562 0.459 4836.460 -1.019 4.014 142.249 121.995 6441.240 1604.780 4023.010
C41124 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 21420.087 21545.810 22251.570 20186.550 618.318 -0.499 2065.020 -0.957 1.102 60.736 53.845 22251.570 20186.550 21219.060 33.831 34.701 39.645 26.980 4.306 -0.234 12.666 -1.512 1.469 0.957 0.846 39.645 26.980 33.312 14.040 14.040 14.040 14.040 0.000 0.000 0.000 0.000 1.000 0.000 0.000 14.040 14.040 14.040 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 64380.580 63115.041 82475.617 53376.438 8979.629 0.783 29099.179 -0.589 1.545 1697.245 1450.585 82475.617 53376.438 67926.027 1329.913 1204.190 2563.450 498.430 618.318 0.499 2065.020 -0.957 5.143 60.736 53.845 2563.450 498.430 1530.940
C41125 37.500 37.500 52 23 8.803 0.000 29 -1.200 2.261 1.000 1.000 52 23 37.500 136678.711 137089.880 139200.000 132868.380 1875.052 -0.725 6331.620 -0.316 1.048 218.332 200.000 139200.000 132868.380 136034.190 90.252 96.498 104.853 70.426 12.631 -0.620 34.426 -1.362 1.489 2.408 1.852 104.853 70.426 87.640 8.984 8.625 10.750 8.625 0.737 1.824 2.125 1.712 1.246 0.147 0.000 10.750 8.625 9.688 1.773 1.610 2.263 1.464 0.291 0.764 0.798 -1.216 1.545 0.046 0.040 2.263 1.464 1.864 1.146 1.681 3.121 -4.147 2.033 -1.587 7.268 1.612 -0.753 0.763 0.581 4.147 0.241 2.194 7.190 7.900 10.000 4.400 2.102 -0.163 5.600 -1.765 2.273 0.310 0.200 10.000 4.400 7.200 154864.455 140651.674 197653.029 127916.771 25448.438 0.764 69736.258 -1.216 1.545 4013.654 3458.862 197653.029 127916.771 162784.900 2521.289 2110.120 6331.620 0.000 1875.052 0.725 6331.620 -0.316 inf 218.332 200.000 6331.620 0.000 3165.810
In [275]:
test_final_dt_1['CustomerID']=test_final_dt_1.index
In [276]:
test_final_dt_1.head(10)
Out[276]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg CustomerID
CustomerID
C41116 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 57546.403 57950.270 61031.100 52686.350 2518.907 -0.410 8344.750 -1.032 1.158 245.434 229.055 61031.100 52686.350 56858.725 41.627 43.178 49.157 33.911 5.301 -0.159 15.246 -1.645 1.450 1.191 1.097 49.157 33.911 41.534 10.107 10.500 11.000 9.250 0.685 -0.386 1.750 -1.685 1.189 0.103 0.000 11.000 9.250 10.125 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 140488.186 137726.587 179974.299 116475.479 19594.911 0.783 63498.820 -0.589 1.545 3703.646 3165.397 179974.299 116475.479 148224.889 6203.597 5799.730 11063.650 2718.900 2518.907 0.410 8344.750 -1.032 4.069 245.434 229.055 11063.650 2718.900 6891.275 C41116
C41117 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 25540.784 25924.430 29641.160 20092.000 2887.515 -0.350 9549.160 -1.082 1.475 280.858 274.935 29641.160 20092.000 24866.580 17.869 18.569 22.174 12.904 2.635 -0.319 9.270 -1.112 1.718 0.558 0.443 22.174 12.904 17.539 8.990 8.990 8.990 8.990 0.000 0.000 0.000 0.000 1.000 0.000 0.000 8.990 8.990 8.990 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 144896.970 142048.707 185622.232 120130.699 20209.836 0.783 65491.533 -0.589 1.545 3819.873 3264.733 185622.232 120130.699 152876.465 9559.216 9175.570 15008.000 5458.840 2887.515 0.350 9549.160 -1.082 2.749 280.858 274.935 15008.000 5458.840 10233.420 C41117
C41118 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 88553.436 88748.400 92310.090 83836.020 2439.412 -0.220 8474.070 -1.099 1.101 301.991 277.810 92310.090 83836.020 88073.055 91.750 94.125 107.229 73.448 11.394 -0.258 33.781 -1.465 1.460 2.551 2.251 107.229 73.448 90.339 8.211 8.200 10.875 6.700 1.128 0.641 4.175 0.600 1.623 0.201 0.000 10.875 6.700 8.787 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 98106.412 96177.918 125680.552 81337.738 13683.616 0.783 44342.813 -0.589 1.545 2586.348 2210.476 125680.552 81337.738 103509.145 4246.564 4051.600 8963.980 489.910 2439.412 0.220 8474.070 -1.099 18.297 301.991 277.810 8963.980 489.910 4726.945 C41118
C41119 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 160142.145 160372.280 164438.800 155180.900 2745.590 -0.212 9257.900 -1.090 1.060 272.291 256.810 164438.800 155180.900 159809.850 102.933 104.481 121.111 81.130 12.939 -0.318 39.981 -1.335 1.493 2.855 2.627 121.111 81.130 101.120 7.329 7.500 7.500 6.000 0.484 -2.535 1.500 4.689 1.250 0.044 0.000 7.500 6.000 6.750 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 158217.589 155107.480 202686.791 131174.513 22067.760 0.783 71512.278 -0.589 1.545 4171.040 3564.866 202686.791 131174.513 166930.652 4857.855 4627.720 9819.100 561.200 2745.590 0.212 9257.900 -1.090 17.497 272.291 256.810 9819.100 561.200 5190.150 C41119
C41120 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 63062.625 63384.120 66606.840 58520.110 2496.772 -0.312 8086.730 -1.202 1.138 237.845 201.495 66606.840 58520.110 62563.475 92.498 95.859 109.070 75.042 11.657 -0.172 34.028 -1.611 1.453 2.619 2.317 109.070 75.042 92.056 7.290 6.975 9.975 6.975 0.676 2.843 3.000 8.162 1.430 0.176 0.000 9.975 6.975 8.475 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 69285.776 67923.814 88759.484 57443.221 9663.792 0.783 31316.263 -0.589 1.545 1826.559 1561.106 88759.484 57443.221 73101.353 3937.375 3615.880 8479.890 393.160 2496.772 0.312 8086.730 -1.202 21.569 237.845 201.495 8479.890 393.160 4436.525 C41120
C41121 37.000 37.000 48 26 6.782 0.000 22 -1.200 1.846 1.000 1.000 48 26 37.000 141858.406 142113.190 146118.240 137711.720 2749.158 0.098 8406.520 -1.498 1.061 422.474 399.300 146118.240 137711.720 141914.980 63.032 67.032 71.592 49.392 7.872 -0.765 22.199 -1.034 1.449 1.805 1.772 71.592 49.392 60.492 7.560 7.560 7.560 7.560 0.000 0.000 0.000 0.000 1.000 0.000 0.000 7.560 7.560 7.560 1.745 1.610 2.251 1.464 0.273 0.906 0.786 -0.836 1.537 0.051 0.047 2.251 1.464 1.858 0.791 1.639 3.029 -4.147 2.167 -1.336 7.176 0.563 -0.731 0.881 0.670 4.147 0.241 2.194 7.352 8.300 10.000 4.400 2.209 -0.253 5.600 -1.850 2.273 0.364 0.200 10.000 4.400 7.200 229331.592 211589.607 295831.183 192467.689 35927.342 0.906 103363.494 -0.836 1.537 6710.294 6235.979 295831.183 192467.689 244149.436 5141.594 4886.810 9288.280 881.760 2749.158 -0.098 8406.520 -1.498 10.534 422.474 399.300 9288.280 881.760 5085.020 C41121
C41122 33.500 33.500 41 26 4.761 0.000 15 -1.200 1.577 1.000 1.000 41 26 33.500 201369.084 201821.305 202393.890 199544.240 1002.732 -0.724 2849.650 -0.984 1.014 392.361 316.350 202393.890 199544.240 200969.065 83.716 84.479 99.843 67.208 12.440 -0.137 32.634 -1.770 1.486 2.779 2.402 99.843 67.208 83.526 6.080 6.080 6.080 6.080 0.000 0.000 0.000 0.000 1.000 0.000 0.000 6.080 6.080 6.080 1.837 1.769 2.251 1.515 0.281 0.364 0.736 -1.679 1.486 0.059 0.048 2.251 1.515 1.883 0.213 1.167 2.682 -4.147 2.350 -0.905 6.829 -0.751 -0.647 1.043 0.747 4.147 0.241 2.194 6.619 5.400 10.000 4.400 2.282 0.553 5.600 -1.624 2.273 0.427 0.300 10.000 4.400 7.200 245744.799 236707.801 301144.070 202653.504 37618.789 0.364 98490.566 -1.679 1.486 7877.105 6488.444 301144.070 202653.504 251898.787 1630.916 1178.695 3455.760 606.110 1002.732 0.724 2849.650 -0.984 5.702 392.361 316.350 3455.760 606.110 2030.935 C41122
C41123 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 28823.531 29117.050 30795.220 25958.760 1468.562 -0.459 4836.460 -1.019 1.186 142.249 121.995 30795.220 25958.760 28376.990 25.501 26.273 30.268 20.931 3.293 -0.139 9.338 -1.675 1.446 0.740 0.691 30.268 20.931 25.600 12.000 12.000 12.000 12.000 0.000 0.000 0.000 0.000 1.000 0.000 0.000 12.000 12.000 12.000 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 114849.176 112591.567 147129.097 95218.773 16018.851 0.783 51910.323 -0.589 1.545 3027.732 2587.714 147129.097 95218.773 121173.935 3576.469 3282.950 6441.240 1604.780 1468.562 0.459 4836.460 -1.019 4.014 142.249 121.995 6441.240 1604.780 4023.010 C41123
C41124 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 21420.087 21545.810 22251.570 20186.550 618.318 -0.499 2065.020 -0.957 1.102 60.736 53.845 22251.570 20186.550 21219.060 33.831 34.701 39.645 26.980 4.306 -0.234 12.666 -1.512 1.469 0.957 0.846 39.645 26.980 33.312 14.040 14.040 14.040 14.040 0.000 0.000 0.000 0.000 1.000 0.000 0.000 14.040 14.040 14.040 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 64380.580 63115.041 82475.617 53376.438 8979.629 0.783 29099.179 -0.589 1.545 1697.245 1450.585 82475.617 53376.438 67926.027 1329.913 1204.190 2563.450 498.430 618.318 0.499 2065.020 -0.957 5.143 60.736 53.845 2563.450 498.430 1530.940 C41124
C41125 37.500 37.500 52 23 8.803 0.000 29 -1.200 2.261 1.000 1.000 52 23 37.500 136678.711 137089.880 139200.000 132868.380 1875.052 -0.725 6331.620 -0.316 1.048 218.332 200.000 139200.000 132868.380 136034.190 90.252 96.498 104.853 70.426 12.631 -0.620 34.426 -1.362 1.489 2.408 1.852 104.853 70.426 87.640 8.984 8.625 10.750 8.625 0.737 1.824 2.125 1.712 1.246 0.147 0.000 10.750 8.625 9.688 1.773 1.610 2.263 1.464 0.291 0.764 0.798 -1.216 1.545 0.046 0.040 2.263 1.464 1.864 1.146 1.681 3.121 -4.147 2.033 -1.587 7.268 1.612 -0.753 0.763 0.581 4.147 0.241 2.194 7.190 7.900 10.000 4.400 2.102 -0.163 5.600 -1.765 2.273 0.310 0.200 10.000 4.400 7.200 154864.455 140651.674 197653.029 127916.771 25448.438 0.764 69736.258 -1.216 1.545 4013.654 3458.862 197653.029 127916.771 162784.900 2521.289 2110.120 6331.620 0.000 1875.052 0.725 6331.620 -0.316 inf 218.332 200.000 6331.620 0.000 3165.810 C41125
In [277]:
test_final_dt_1.shape
Out[277]:
(2774, 127)
In [278]:
test_final_dt_1.describe(include='all')
Out[278]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg CustomerID
count 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2745.000 2745.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774.000 2774
unique nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 2774
top nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan C41771
freq nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 1
mean 35.583 35.592 47.271 23.867 7.188 -0.002 23.405 -1.199 2.200 1.002 1.000 47.271 23.867 35.569 247953.391 249305.534 260438.736 229633.614 9677.249 -0.284 30805.122 -0.351 inf 1450.093 979.598 260438.736 229633.614 245036.175 82.834 84.410 97.981 66.045 10.464 -0.105 31.935 -0.711 inf 2.695 2.297 97.981 66.045 82.013 6.824 6.806 7.578 6.195 0.548 0.143 1.383 0.167 1.393 0.089 0.017 7.578 6.195 6.887 1.816 1.770 2.206 1.519 0.228 0.384 0.687 -0.598 1.461 0.053 0.045 2.206 1.519 1.862 1.156 1.722 3.203 -3.320 1.920 -1.239 6.523 0.921 0.007 0.760 0.581 4.050 0.375 2.213 6.654 6.613 9.159 4.474 1.641 0.221 4.685 -0.799 2.049 0.324 0.227 9.159 4.474 6.817 316687.661 308691.088 384584.089 264776.563 39890.022 0.383 119807.526 -0.599 1.461 9257.113 7989.704 384584.089 264776.563 324680.326 11553.058 10200.915 29872.835 -932.287 9677.249 0.284 30805.122 -0.351 inf 1450.093 979.598 32985.789 2720.514 17853.152 NaN
std 7.632 7.635 11.814 5.730 3.057 0.024 10.589 0.032 1.619 0.017 0.000 11.814 5.730 7.627 209269.170 210649.306 217451.492 201412.180 17960.389 0.881 57940.209 3.361 nan 3224.764 1548.505 217451.492 201412.180 207573.461 23.330 24.984 25.788 20.807 4.264 0.764 14.084 1.581 nan 1.249 0.968 25.788 20.807 22.347 1.796 1.836 1.991 2.253 0.780 0.912 1.978 2.717 0.703 0.153 0.072 1.991 2.253 1.882 0.154 0.209 0.114 0.137 0.056 0.752 0.142 1.046 0.115 0.009 0.009 0.114 0.137 0.104 0.799 0.581 0.483 1.866 0.518 0.584 1.706 1.556 3.797 0.176 0.127 0.313 0.384 0.205 0.990 1.383 1.627 0.245 0.621 0.725 1.607 1.511 0.362 0.099 0.082 1.627 0.245 0.841 280846.086 275755.349 339608.074 234231.855 37982.844 0.752 110692.265 1.045 0.115 8410.770 7414.789 339608.074 234231.855 286419.114 33548.871 35084.770 63054.631 18274.150 17960.389 0.881 57940.209 3.361 nan 3224.764 1548.505 62524.788 13637.688 35035.591 NaN
min 5.500 5.500 10.000 1.000 3.028 -0.488 9.000 -1.579 1.200 0.963 1.000 10.000 1.000 5.500 5111.089 2191.660 8578.680 0.000 188.167 -5.835 1000.570 -2.291 1.001 28.028 0.000 8578.680 0.000 4774.010 2.531 1.073 12.031 0.000 0.624 -3.412 2.052 -2.007 1.062 0.359 0.043 12.031 0.000 7.891 2.000 1.375 2.000 1.000 0.000 -6.325 0.000 -2.571 1.000 0.000 0.000 2.000 1.000 2.000 1.213 1.215 1.386 1.078 0.042 -1.781 0.146 -1.937 1.083 0.033 0.018 1.386 1.078 1.232 -0.899 -0.278 1.851 -4.147 0.447 -2.270 1.333 -1.978 -13.739 0.272 0.209 2.716 0.205 1.526 4.700 4.500 5.000 3.800 0.189 -1.783 0.600 -2.059 1.136 0.100 0.100 5.000 3.800 4.700 18551.266 17354.333 23678.424 16266.723 1572.480 -1.781 5245.134 -1.937 1.083 570.656 485.633 23678.424 16266.723 19972.574 -80694.718 -89534.380 -22940.870 -121747.380 188.167 -4.766 1000.570 -2.291 -6263626.994 28.028 0.000 652.360 0.000 437.205 NaN
25% 31.000 31.000 38.000 21.000 4.183 0.000 13.000 -1.200 1.562 1.000 1.000 38.000 21.000 31.000 98426.574 98502.524 102718.180 87555.560 1622.099 -0.411 5098.060 -1.241 1.033 283.234 194.281 102718.180 87555.560 97134.513 66.889 66.824 79.674 54.888 7.827 -0.537 24.041 -1.535 1.408 1.976 1.600 79.674 54.888 66.861 5.634 5.750 6.125 4.875 0.000 0.000 0.000 -0.657 1.000 0.000 0.000 6.125 4.875 5.671 1.717 1.611 2.197 1.464 0.187 0.013 0.659 -1.300 1.435 0.047 0.040 2.197 1.464 1.831 0.741 1.586 3.029 -4.147 1.874 -1.719 6.508 -0.506 -0.791 0.688 0.490 4.147 0.241 2.194 5.779 5.000 9.500 4.400 1.547 -0.257 5.000 -1.531 2.045 0.290 0.200 9.500 4.400 6.950 124767.390 120449.955 152960.660 103614.815 14727.996 0.011 45837.772 -1.300 1.435 3529.079 2981.266 152960.660 103614.815 129278.936 1590.322 1175.781 3630.905 0.000 1622.099 0.014 5098.060 -1.241 4.785 283.234 194.281 5634.238 161.700 3086.222 NaN
50% 37.000 37.000 48.000 25.000 7.071 0.000 23.000 -1.200 1.941 1.000 1.000 48.000 25.000 37.000 180452.036 181097.578 188942.125 164980.105 4000.748 -0.169 12760.940 -1.152 1.081 608.360 460.735 188942.125 164980.105 178047.962 85.509 86.172 101.403 69.940 10.502 -0.200 31.870 -1.219 1.446 2.576 2.133 101.403 69.940 85.503 6.625 6.625 7.125 6.375 0.000 0.000 0.000 0.000 1.000 0.000 0.000 7.125 6.375 6.625 1.791 1.732 2.263 1.464 0.245 0.572 0.736 -0.931 1.494 0.050 0.046 2.263 1.464 1.864 1.155 1.693 3.029 -4.147 1.986 -1.404 7.176 0.951 -0.731 0.758 0.610 4.147 0.241 2.194 6.991 6.900 10.000 4.400 1.879 0.037 5.500 -1.349 2.222 0.324 0.200 10.000 4.400 7.200 217627.140 211784.604 263794.074 182561.056 26479.105 0.572 79349.876 -0.931 1.494 6343.333 5398.490 263794.074 182561.056 225193.836 4787.635 3869.910 10262.055 377.060 4000.748 0.169 12760.940 -1.152 14.262 608.360 460.735 13834.520 531.895 7507.452 NaN
75% 41.500 41.500 60.000 28.000 9.958 0.000 33.000 -1.200 2.400 1.000 1.000 60.000 28.000 41.500 353861.577 355579.769 375160.913 324775.182 10255.072 -0.014 32292.145 -0.887 1.160 1550.110 1165.333 375160.913 324775.182 349029.425 100.332 103.882 116.728 80.072 12.709 0.235 37.730 -0.430 1.504 3.180 2.970 116.728 80.072 98.635 7.830 7.750 8.625 7.600 1.123 0.000 2.875 0.000 1.581 0.147 0.000 8.625 7.600 7.989 1.879 1.861 2.263 1.515 0.267 0.889 0.798 -0.142 1.545 0.057 0.050 2.263 1.515 1.886 1.456 1.876 3.347 -4.147 2.323 -0.818 7.268 2.331 -0.731 0.836 0.670 4.147 0.241 2.194 7.370 7.800 10.000 4.500 2.050 0.480 5.600 -0.833 2.273 0.367 0.250 10.000 4.500 7.250 428559.341 417652.243 524727.511 360774.367 53156.855 0.885 161634.430 -0.142 1.545 12528.256 10653.806 524727.511 360774.367 441453.814 12658.299 10301.673 29347.480 1432.700 10255.072 0.411 32292.145 -0.887 42.290 1550.110 1165.333 34406.008 1777.565 18480.365 NaN
max 55.000 55.000 60.000 50.000 14.000 0.339 47.000 -0.340 29.000 1.286 1.000 60.000 50.000 55.000 1824280.256 1829948.290 1946648.790 1750000.000 247412.944 4.766 800000.000 35.566 inf 55361.295 18729.210 1946648.790 1750000.000 1810873.690 239.046 244.185 262.205 200.684 41.642 4.772 138.591 22.838 inf 17.406 8.342 262.205 200.684 231.445 16.500 16.500 37.500 16.500 9.546 4.690 30.750 40.000 8.527 3.667 0.500 37.500 16.500 22.125 2.195 2.208 2.263 2.089 0.428 2.124 1.185 4.942 2.099 0.081 0.078 2.263 2.089 2.176 3.548 3.359 5.132 2.987 2.848 1.125 8.467 5.913 25.055 1.342 1.097 5.132 2.987 3.654 9.138 9.400 10.000 8.200 2.383 2.561 5.600 7.964 2.273 0.611 0.500 10.000 8.200 9.100 2428423.859 2472630.042 2906252.535 1880863.864 334417.980 2.124 1025388.671 4.942 2.099 79997.815 68290.818 2906252.535 1880863.864 2393558.199 761089.599 776513.580 823522.560 495424.890 247412.944 5.835 800000.000 35.566 inf 55361.295 18729.210 823522.560 495424.890 659473.725 NaN


Join Two Column test_Merge dt 2 And test_final dt 1

In [279]:
test_train_data_1=test_final_dt_1.merge(test_merge_dt_2, left_on="CustomerID", right_on='CustomerID')
In [280]:
test_train_data_1.fillna(test_train_data_1['remaining_outstanding_maxtoMin'].median(),inplace=True)
In [281]:
test_train_data_1.isna().sum()
Out[281]:
Current_Instalment_Sequence_mean                  0
Current_Instalment_Sequence_median                0
Current_Instalment_Sequence_max                   0
Current_Instalment_Sequence_min                   0
Current_Instalment_Sequence_std                   0
Current_Instalment_Sequence_skew                  0
Current_Instalment_Sequence_range                 0
Current_Instalment_Sequence_kurtosis              0
Current_Instalment_Sequence_maxtoMin              0
Current_Instalment_Sequence_meanAD                0
Current_Instalment_Sequence_mad                   0
Current_Instalment_Sequence_abs_max               0
Current_Instalment_Sequence_abs_min               0
Current_Instalment_Sequence_abs_avg               0
Current_Outstanding_mean                          0
Current_Outstanding_median                        0
Current_Outstanding_max                           0
Current_Outstanding_min                           0
Current_Outstanding_std                           0
Current_Outstanding_skew                          0
Current_Outstanding_range                         0
Current_Outstanding_kurtosis                      0
Current_Outstanding_maxtoMin                      0
Current_Outstanding_meanAD                        0
Current_Outstanding_mad                           0
Current_Outstanding_abs_max                       0
Current_Outstanding_abs_min                       0
Current_Outstanding_abs_avg                       0
Current_Loan_to_Appraisedvalu_Percent_mean        0
Current_Loan_to_Appraisedvalu_Percent_median      0
Current_Loan_to_Appraisedvalu_Percent_max         0
Current_Loan_to_Appraisedvalu_Percent_min         0
Current_Loan_to_Appraisedvalu_Percent_std         0
Current_Loan_to_Appraisedvalu_Percent_skew        0
Current_Loan_to_Appraisedvalu_Percent_range       0
Current_Loan_to_Appraisedvalu_Percent_kurtosis    0
Current_Loan_to_Appraisedvalu_Percent_maxtoMin    0
Current_Loan_to_Appraisedvalu_Percent_meanAD      0
Current_Loan_to_Appraisedvalu_Percent_mad         0
Current_Loan_to_Appraisedvalu_Percent_abs_max     0
Current_Loan_to_Appraisedvalu_Percent_abs_min     0
Current_Loan_to_Appraisedvalu_Percent_abs_avg     0
CurrentInterestrate_mean                          0
CurrentInterestrate_median                        0
CurrentInterestrate_max                           0
CurrentInterestrate_min                           0
CurrentInterestrate_std                           0
CurrentInterestrate_skew                          0
CurrentInterestrate_range                         0
CurrentInterestrate_kurtosis                      0
CurrentInterestrate_maxtoMin                      0
CurrentInterestrate_meanAD                        0
CurrentInterestrate_mad                           0
CurrentInterestrate_abs_max                       0
CurrentInterestrate_abs_min                       0
CurrentInterestrate_abs_avg                       0
RealEstate_Current_Inflation_mean                 0
RealEstate_Current_Inflation_median               0
RealEstate_Current_Inflation_max                  0
RealEstate_Current_Inflation_min                  0
RealEstate_Current_Inflation_std                  0
RealEstate_Current_Inflation_skew                 0
RealEstate_Current_Inflation_range                0
RealEstate_Current_Inflation_kurtosis             0
RealEstate_Current_Inflation_maxtoMin             0
RealEstate_Current_Inflation_meanAD               0
RealEstate_Current_Inflation_mad                  0
RealEstate_Current_Inflation_abs_max              0
RealEstate_Current_Inflation_abs_min              0
RealEstate_Current_Inflation_abs_avg              0
GDP_mean                                          0
GDP_median                                        0
GDP_max                                           0
GDP_min                                           0
GDP_std                                           0
GDP_skew                                          0
GDP_range                                         0
GDP_kurtosis                                      0
GDP_maxtoMin                                      0
GDP_meanAD                                        0
GDP_mad                                           0
GDP_abs_max                                       0
GDP_abs_min                                       0
GDP_abs_avg                                       0
UnemploymentRate_mean                             0
UnemploymentRate_median                           0
UnemploymentRate_max                              0
UnemploymentRate_min                              0
UnemploymentRate_std                              0
UnemploymentRate_skew                             0
UnemploymentRate_range                            0
UnemploymentRate_kurtosis                         0
UnemploymentRate_maxtoMin                         0
UnemploymentRate_meanAD                           0
UnemploymentRate_mad                              0
UnemploymentRate_abs_max                          0
UnemploymentRate_abs_min                          0
UnemploymentRate_abs_avg                          0
current_Appraisal_value_mean                      0
current_Appraisal_value_median                    0
current_Appraisal_value_max                       0
current_Appraisal_value_min                       0
current_Appraisal_value_std                       0
current_Appraisal_value_skew                      0
current_Appraisal_value_range                     0
current_Appraisal_value_kurtosis                  0
current_Appraisal_value_maxtoMin                  0
current_Appraisal_value_meanAD                    0
current_Appraisal_value_mad                       0
current_Appraisal_value_abs_max                   0
current_Appraisal_value_abs_min                   0
current_Appraisal_value_abs_avg                   0
remaining_outstanding_mean                        0
remaining_outstanding_median                      0
remaining_outstanding_max                         0
remaining_outstanding_min                         0
remaining_outstanding_std                         0
remaining_outstanding_skew                        0
remaining_outstanding_range                       0
remaining_outstanding_kurtosis                    0
remaining_outstanding_maxtoMin                    0
remaining_outstanding_meanAD                      0
remaining_outstanding_mad                         0
remaining_outstanding_abs_max                     0
remaining_outstanding_abs_min                     0
remaining_outstanding_abs_avg                     0
CustomerID                                        0
Starting_Instalment                               0
Maturity_Period                                   0
Asset_type                                        0
Urban_Development                                 0
Villa_House                                       0
Investment_SelfOccupied                           0
Starting_outstanding                              0
Starting_Loan_to_Appraisedvalu_Percent            0
StartingInterestrate                              0
RealEstate_Starting_Inflation                     0
age                                               0
Salary                                            0
ProfessionalLicensure                             0
UtilitySpending                                   0
eCommerceAccount                                  0
SocialMediaAccount                                0
Appraisal_value                                   0
NoOfProperties                                    0
dtype: int64
In [282]:
test_train_data_1[['eCommerceAccount','ProfessionalLicensure']]=test_train_data_1[['eCommerceAccount','ProfessionalLicensure']].astype('object')
In [283]:
#num1_cols = list(train_data_1.select_dtypes(include=['float64','float32','int32','int64']).columns)
#cat1_cols = list(train_data_1.select_dtypes(include=['object']).columns)
categorical_features =test_train_data_1.select_dtypes(include=['object']).columns
numerical_features =test_train_data_1.select_dtypes(include=['float64','float32','int32','int64']).columns


Feature Engg On Payment Status Column

In [284]:
new=test_merge_dt[['CustomerID','Current_Instalment_Sequence', 'Current_Outstanding','Current_Loan_to_Appraisedvalu_Percent','CurrentInterestrate',
                    'RealEstate_Current_Inflation','GDP', 'UnemploymentRate','current_Appraisal_value','remaining_outstanding','Payment_Status']]
In [285]:
new.Payment_Status.unique()
Out[285]:
array(['Non-Payoff/Non-Default', 'Default', 'Payoff'], dtype=object)
In [286]:
payment={'Non-Payoff/Non-Default': 2,'Payoff':4,'Default':0}

new['Payment_Status']=new['Payment_Status'].map(payment)
create payment_total_score
In [287]:
new['payment_total_score'] =[4]*new.shape[0]
create payment dataframe
In [288]:
payment_data=pd.DataFrame({'CustomerID':new.CustomerID,'Payment_Status':new.Payment_Status,'payment_total_score':new.payment_total_score})
define function to find z score of payment status
In [289]:
def payment_eng1(data):
    df = pd.DataFrame()
    for col in data.columns:
        if col in ['CustomerID']:
            continue
        df[col + '_count'] = data.groupby(['CustomerID'])[col].count()
        df[col + '_sum'] = data.groupby(['CustomerID'])[col].sum()
    return df
In [290]:
payment_data=payment_eng1(data=payment_data)
In [291]:
payment_data.head(5)
Out[291]:
Payment_Status_count Payment_Status_sum payment_total_score_count payment_total_score_sum
CustomerID
C41116 35 70 35 140
C41117 35 70 35 140
C41118 35 70 35 140
C41119 35 70 35 140
C41120 35 70 35 140
In [292]:
from scipy.stats import zscore
payment_percentile= payment_data.Payment_Status_sum/payment_data.payment_total_score_sum
payment_data['payment_z_score']=zscore(payment_percentile)
payment_data['payment_lenght']=payment_data.Payment_Status_count
In [293]:
print('size of train data',train_data_1.shape)
print('size of payment col',len(payment_percentile))
size of train data (11089, 147)
size of payment col 2774
In [294]:
payment_data2=payment_data[['payment_z_score','payment_lenght']]
In [295]:
test_train_data_1=test_train_data_1.merge(payment_data2, left_on="CustomerID", right_on='CustomerID')
In [296]:
test_train_data_1.head(5)
Out[296]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg CustomerID Starting_Instalment Maturity_Period Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation age Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount Appraisal_value NoOfProperties payment_z_score payment_lenght
0 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 57546.403 57950.270 61031.100 52686.350 2518.907 -0.410 8344.750 -1.032 1.158 245.434 229.055 61031.100 52686.350 56858.725 41.627 43.178 49.157 33.911 5.301 -0.159 15.246 -1.645 1.450 1.191 1.097 49.157 33.911 41.534 10.107 10.500 11.000 9.250 0.685 -0.386 1.750 -1.685 1.189 0.103 0.000 11.000 9.250 10.125 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 140488.186 137726.587 179974.299 116475.479 19594.911 0.783 63498.820 -0.589 1.545 3703.646 3165.397 179974.299 116475.479 148224.889 6203.597 5799.730 11063.650 2718.900 2518.907 0.410 8344.750 -1.032 4.069 245.434 229.055 11063.650 2718.900 6891.275 C41116 25 119 No shred services No Yes Self Occupancy 63750.000 81.800 10.500 0.980 46 1876.000 1 396.162 1 Yes 77933.985 1 -0.056 35
1 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 25540.784 25924.430 29641.160 20092.000 2887.515 -0.350 9549.160 -1.082 1.475 280.858 274.935 29641.160 20092.000 24866.580 17.869 18.569 22.174 12.904 2.635 -0.319 9.270 -1.112 1.718 0.558 0.443 22.174 12.904 17.539 8.990 8.990 8.990 8.990 0.000 0.000 0.000 0.000 1.000 0.000 0.000 8.990 8.990 8.990 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 144896.970 142048.707 185622.232 120130.699 20209.836 0.783 65491.533 -0.589 1.545 3819.873 3264.733 185622.232 120130.699 152876.465 9559.216 9175.570 15008.000 5458.840 2887.515 0.350 9549.160 -1.082 2.749 280.858 274.935 15008.000 5458.840 10233.420 C41117 25 106 No shred services No Yes Self Occupancy 35100.000 54.600 8.990 0.784 49 1261.000 0 195.605 0 No 64285.714 1 -0.056 35
2 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 88553.436 88748.400 92310.090 83836.020 2439.412 -0.220 8474.070 -1.099 1.101 301.991 277.810 92310.090 83836.020 88073.055 91.750 94.125 107.229 73.448 11.394 -0.258 33.781 -1.465 1.460 2.551 2.251 107.229 73.448 90.339 8.211 8.200 10.875 6.700 1.128 0.641 4.175 0.600 1.623 0.201 0.000 10.875 6.700 8.787 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 98106.412 96177.918 125680.552 81337.738 13683.616 0.783 44342.813 -0.589 1.545 2586.348 2210.476 125680.552 81337.738 103509.145 4246.564 4051.600 8963.980 489.910 2439.412 0.220 8474.070 -1.099 18.297 301.991 277.810 8963.980 489.910 4726.945 C41118 25 141 No shred services No Yes Self Occupancy 92800.000 80.000 8.200 2.089 38 1026.000 1 212.937 0 Yes 116000.000 2 -0.056 35
3 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 160142.145 160372.280 164438.800 155180.900 2745.590 -0.212 9257.900 -1.090 1.060 272.291 256.810 164438.800 155180.900 159809.850 102.933 104.481 121.111 81.130 12.939 -0.318 39.981 -1.335 1.493 2.855 2.627 121.111 81.130 101.120 7.329 7.500 7.500 6.000 0.484 -2.535 1.500 4.689 1.250 0.044 0.000 7.500 6.000 6.750 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 158217.589 155107.480 202686.791 131174.513 22067.760 0.783 71512.278 -0.589 1.545 4171.040 3564.866 202686.791 131174.513 166930.652 4857.855 4627.720 9819.100 561.200 2745.590 0.212 9257.900 -1.090 17.497 272.291 256.810 9819.100 561.200 5190.150 C41119 25 142 No shred services No Yes Self Occupancy 165000.000 88.200 7.500 2.089 36 1384.000 0 396.162 1 Yes 187074.830 1 -0.056 35
4 42.257 42.000 60 25 10.587 0.057 35 -1.190 2.400 1.029 1.000 60 25 42.500 63062.625 63384.120 66606.840 58520.110 2496.772 -0.312 8086.730 -1.202 1.138 237.845 201.495 66606.840 58520.110 62563.475 92.498 95.859 109.070 75.042 11.657 -0.172 34.028 -1.611 1.453 2.619 2.317 109.070 75.042 92.056 7.290 6.975 9.975 6.975 0.676 2.843 3.000 8.162 1.430 0.176 0.000 9.975 6.975 8.475 1.766 1.732 2.263 1.464 0.246 0.783 0.798 -0.589 1.545 0.047 0.040 2.263 1.464 1.864 1.264 1.717 3.029 -4.147 1.907 -1.805 7.176 2.474 -0.731 0.769 0.622 4.147 0.241 2.194 7.163 7.300 10.000 4.400 1.908 -0.062 5.600 -1.502 2.273 0.312 0.200 10.000 4.400 7.200 69285.776 67923.814 88759.484 57443.221 9663.792 0.783 31316.263 -0.589 1.545 1826.559 1561.106 88759.484 57443.221 73101.353 3937.375 3615.880 8479.890 393.160 2496.772 0.312 8086.730 -1.202 21.569 237.845 201.495 8479.890 393.160 4436.525 C41120 25 142 No shred services No Yes Self Occupancy 67000.000 78.800 6.975 2.168 40 1913.000 1 220.365 1 Yes 85025.381 2 -0.056 35

Skewed Transformation

In [299]:
#log transform skewed numeric features:
numeric_feats = test_train_data_1.dtypes[test_train_data_1.dtypes != "object"].index

skewed_feats = test_train_data_1[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
skewed_feats = skewed_feats[skewed_feats > 0.75]
skewed_feats = skewed_feats.index

test_train_data_1[skewed_feats] = np.log1p(test_train_data_1[skewed_feats])
In [300]:
test_train_data_1[numeric_feats] = test_train_data_1[numeric_feats].apply(lambda x:pd.to_numeric(x)) #
In [301]:
test_train_data_1.drop(axis=1,columns='CustomerID',inplace=True)
In [302]:
test_df=test_train_data_1
In [304]:
test_df.head(5)
Out[304]:
Current_Instalment_Sequence_mean Current_Instalment_Sequence_median Current_Instalment_Sequence_max Current_Instalment_Sequence_min Current_Instalment_Sequence_std Current_Instalment_Sequence_skew Current_Instalment_Sequence_range Current_Instalment_Sequence_kurtosis Current_Instalment_Sequence_maxtoMin Current_Instalment_Sequence_meanAD Current_Instalment_Sequence_mad Current_Instalment_Sequence_abs_max Current_Instalment_Sequence_abs_min Current_Instalment_Sequence_abs_avg Current_Outstanding_mean Current_Outstanding_median Current_Outstanding_max Current_Outstanding_min Current_Outstanding_std Current_Outstanding_skew Current_Outstanding_range Current_Outstanding_kurtosis Current_Outstanding_maxtoMin Current_Outstanding_meanAD Current_Outstanding_mad Current_Outstanding_abs_max Current_Outstanding_abs_min Current_Outstanding_abs_avg Current_Loan_to_Appraisedvalu_Percent_mean Current_Loan_to_Appraisedvalu_Percent_median Current_Loan_to_Appraisedvalu_Percent_max Current_Loan_to_Appraisedvalu_Percent_min Current_Loan_to_Appraisedvalu_Percent_std Current_Loan_to_Appraisedvalu_Percent_skew Current_Loan_to_Appraisedvalu_Percent_range Current_Loan_to_Appraisedvalu_Percent_kurtosis Current_Loan_to_Appraisedvalu_Percent_maxtoMin Current_Loan_to_Appraisedvalu_Percent_meanAD Current_Loan_to_Appraisedvalu_Percent_mad Current_Loan_to_Appraisedvalu_Percent_abs_max Current_Loan_to_Appraisedvalu_Percent_abs_min Current_Loan_to_Appraisedvalu_Percent_abs_avg CurrentInterestrate_mean CurrentInterestrate_median CurrentInterestrate_max CurrentInterestrate_min CurrentInterestrate_std CurrentInterestrate_skew CurrentInterestrate_range CurrentInterestrate_kurtosis CurrentInterestrate_maxtoMin CurrentInterestrate_meanAD CurrentInterestrate_mad CurrentInterestrate_abs_max CurrentInterestrate_abs_min CurrentInterestrate_abs_avg RealEstate_Current_Inflation_mean RealEstate_Current_Inflation_median RealEstate_Current_Inflation_max RealEstate_Current_Inflation_min RealEstate_Current_Inflation_std RealEstate_Current_Inflation_skew RealEstate_Current_Inflation_range RealEstate_Current_Inflation_kurtosis RealEstate_Current_Inflation_maxtoMin RealEstate_Current_Inflation_meanAD RealEstate_Current_Inflation_mad RealEstate_Current_Inflation_abs_max RealEstate_Current_Inflation_abs_min RealEstate_Current_Inflation_abs_avg GDP_mean GDP_median GDP_max GDP_min GDP_std GDP_skew GDP_range GDP_kurtosis GDP_maxtoMin GDP_meanAD GDP_mad GDP_abs_max GDP_abs_min GDP_abs_avg UnemploymentRate_mean UnemploymentRate_median UnemploymentRate_max UnemploymentRate_min UnemploymentRate_std UnemploymentRate_skew UnemploymentRate_range UnemploymentRate_kurtosis UnemploymentRate_maxtoMin UnemploymentRate_meanAD UnemploymentRate_mad UnemploymentRate_abs_max UnemploymentRate_abs_min UnemploymentRate_abs_avg current_Appraisal_value_mean current_Appraisal_value_median current_Appraisal_value_max current_Appraisal_value_min current_Appraisal_value_std current_Appraisal_value_skew current_Appraisal_value_range current_Appraisal_value_kurtosis current_Appraisal_value_maxtoMin current_Appraisal_value_meanAD current_Appraisal_value_mad current_Appraisal_value_abs_max current_Appraisal_value_abs_min current_Appraisal_value_abs_avg remaining_outstanding_mean remaining_outstanding_median remaining_outstanding_max remaining_outstanding_min remaining_outstanding_std remaining_outstanding_skew remaining_outstanding_range remaining_outstanding_kurtosis remaining_outstanding_maxtoMin remaining_outstanding_meanAD remaining_outstanding_mad remaining_outstanding_abs_max remaining_outstanding_abs_min remaining_outstanding_abs_avg Starting_Instalment Maturity_Period Asset_type Urban_Development Villa_House Investment_SelfOccupied Starting_outstanding Starting_Loan_to_Appraisedvalu_Percent StartingInterestrate RealEstate_Starting_Inflation age Salary ProfessionalLicensure UtilitySpending eCommerceAccount SocialMediaAccount Appraisal_value NoOfProperties payment_z_score payment_lenght
0 42.257 42.000 60 25 10.587 0.057 35 nan 1.224 0.708 1.000 60 25 42.500 10.960 10.967 11.019 10.872 7.832 -0.410 9.030 nan 1.158 5.507 5.438 11.019 10.872 10.948 41.627 43.178 49.157 33.911 1.841 -0.159 2.788 nan 1.450 0.784 1.097 49.157 33.911 41.534 2.408 10.500 2.485 9.250 0.522 -0.386 1.012 nan 0.784 0.098 0.000 2.485 9.250 2.409 1.766 1.005 2.263 0.902 0.246 0.783 0.798 -0.889 1.545 0.046 0.040 2.263 0.902 1.864 1.264 1.717 1.394 nan 1.907 nan 7.176 2.474 -1.311 0.769 0.622 4.147 0.216 1.161 7.163 7.300 10.000 1.686 1.908 -0.064 5.600 nan 2.273 0.312 0.182 10.000 1.686 7.200 11.853 11.833 12.101 11.665 9.883 0.783 11.059 -0.889 1.545 8.217 8.060 12.101 11.665 11.906 8.733 8.666 9.312 7.908 7.832 0.344 9.030 nan 4.069 5.507 5.438 9.312 7.908 8.838 25 119 No shred services No Yes Self Occupancy 11.063 81.800 10.500 0.980 3.850 7.537 1 396.162 1 Yes 11.264 0.693 -0.056 35
1 42.257 42.000 60 25 10.587 0.057 35 nan 1.224 0.708 1.000 60 25 42.500 10.148 10.163 10.297 9.908 7.968 -0.350 9.164 nan 1.475 5.641 5.620 10.297 9.908 10.121 17.869 18.569 22.174 12.904 1.291 -0.319 2.329 nan 1.718 0.443 0.443 22.174 12.904 17.539 2.302 8.990 2.302 8.990 0.000 0.000 0.000 0.000 0.693 0.000 0.000 2.302 8.990 2.302 1.766 1.005 2.263 0.902 0.246 0.783 0.798 -0.889 1.545 0.046 0.040 2.263 0.902 1.864 1.264 1.717 1.394 nan 1.907 nan 7.176 2.474 -1.311 0.769 0.622 4.147 0.216 1.161 7.163 7.300 10.000 1.686 1.908 -0.064 5.600 nan 2.273 0.312 0.182 10.000 1.686 7.200 11.884 11.864 12.131 11.696 9.914 0.783 11.090 -0.889 1.545 8.248 8.091 12.131 11.696 11.937 9.165 9.124 9.616 8.605 7.968 0.300 9.164 nan 2.749 5.641 5.620 9.616 8.605 9.234 25 106 No shred services No Yes Self Occupancy 10.466 54.600 8.990 0.784 3.912 7.140 0 195.605 0 No 11.071 0.693 -0.056 35
2 42.257 42.000 60 25 10.587 0.057 35 nan 1.224 0.708 1.000 60 25 42.500 11.391 11.394 11.433 11.337 7.800 -0.220 9.045 nan 1.101 5.714 5.631 11.433 11.337 11.386 91.750 94.125 107.229 73.448 2.517 -0.258 3.549 nan 1.460 1.267 2.251 107.229 73.448 90.339 2.220 8.200 2.474 6.700 0.755 0.641 1.644 0.470 0.964 0.184 0.000 2.474 6.700 2.281 1.766 1.005 2.263 0.902 0.246 0.783 0.798 -0.889 1.545 0.046 0.040 2.263 0.902 1.864 1.264 1.717 1.394 nan 1.907 nan 7.176 2.474 -1.311 0.769 0.622 4.147 0.216 1.161 7.163 7.300 10.000 1.686 1.908 -0.064 5.600 nan 2.273 0.312 0.182 10.000 1.686 7.200 11.494 11.474 11.742 11.306 9.524 0.783 10.700 -0.889 1.545 7.858 7.701 11.742 11.306 11.547 8.354 8.307 9.101 6.196 7.800 0.199 9.045 nan 18.297 5.714 5.631 9.101 6.196 8.461 25 141 No shred services No Yes Self Occupancy 11.438 80.000 8.200 2.089 3.664 6.934 1 212.937 0 Yes 11.661 1.099 -0.056 35
3 42.257 42.000 60 25 10.587 0.057 35 nan 1.224 0.708 1.000 60 25 42.500 11.984 11.985 12.010 11.952 7.918 -0.212 9.133 nan 1.060 5.611 5.552 12.010 11.952 11.982 102.933 104.481 121.111 81.130 2.635 -0.318 3.713 nan 1.493 1.349 2.627 121.111 81.130 101.120 2.120 7.500 2.140 6.000 0.395 -2.535 0.916 1.739 0.811 0.043 0.000 2.140 6.000 2.048 1.766 1.005 2.263 0.902 0.246 0.783 0.798 -0.889 1.545 0.046 0.040 2.263 0.902 1.864 1.264 1.717 1.394 nan 1.907 nan 7.176 2.474 -1.311 0.769 0.622 4.147 0.216 1.161 7.163 7.300 10.000 1.686 1.908 -0.064 5.600 nan 2.273 0.312 0.182 10.000 1.686 7.200 11.972 11.952 12.219 11.784 10.002 0.783 11.178 -0.889 1.545 8.336 8.179 12.219 11.784 12.025 8.489 8.440 9.192 6.332 7.918 0.192 9.133 nan 17.497 5.611 5.552 9.192 6.332 8.555 25 142 No shred services No Yes Self Occupancy 12.014 88.200 7.500 2.089 3.611 7.233 0 396.162 1 Yes 12.139 0.693 -0.056 35
4 42.257 42.000 60 25 10.587 0.057 35 nan 1.224 0.708 1.000 60 25 42.500 11.052 11.057 11.107 10.977 7.823 -0.312 8.998 nan 1.138 5.476 5.311 11.107 10.977 11.044 92.498 95.859 109.070 75.042 2.538 -0.172 3.556 nan 1.453 1.286 2.317 109.070 75.042 92.056 2.115 6.975 2.396 6.975 0.517 2.843 1.386 2.215 0.888 0.163 0.000 2.396 6.975 2.249 1.766 1.005 2.263 0.902 0.246 0.783 0.798 -0.889 1.545 0.046 0.040 2.263 0.902 1.864 1.264 1.717 1.394 nan 1.907 nan 7.176 2.474 -1.311 0.769 0.622 4.147 0.216 1.161 7.163 7.300 10.000 1.686 1.908 -0.064 5.600 nan 2.273 0.312 0.182 10.000 1.686 7.200 11.146 11.126 11.394 10.959 9.176 0.783 10.352 -0.889 1.545 7.511 7.354 11.394 10.959 11.200 8.279 8.193 9.046 5.977 7.823 0.271 8.998 nan 21.569 5.476 5.311 9.046 5.977 8.398 25 142 No shred services No Yes Self Occupancy 11.112 78.800 6.975 2.168 3.714 7.557 1 220.365 1 Yes 11.351 1.099 -0.056 35
In [306]:
test_df = pd.get_dummies(test_df,drop_first=True)
test_df = test_df.fillna(test_df.mean())
In [307]:
test_df.shape
Out[307]:
(2774, 146)

prediction

In [308]:
submission1= pd.read_csv('samplesubmission-1557545918238.csv')
test_dtt=test_df[most_relevant_features]
submission1.head(5)
predict_lmTest_ss = xgb_model6.predict(test_dtt)
predict_test=np.exp(predict_lmTest_ss)
submission1['CreditRiskScore'] = predict_test
submission1.head()
submission1.to_csv('xgb22.csv', index=False)

Conclusion

Model Validation Test
XG Boost5 0.13 113.55
MLR 0.15
Elastic net 0.16
XG Boost4 0.1355 117.35
CAT Boost 0.13 168.55
KNN 0.16 119.79
Stacking 0.1291 116.6
XG boost(pca) 0.1311


XG Boost perform better than others on this dataset